Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Op asset specificity #3841

Merged
merged 24 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
18cd5f3
load operaitons that are versioned with set assets resolves #3837
jsnoble Nov 20, 2024
b6f3c03
more operations fixes, throw if multiple processors are found
jsnoble Nov 22, 2024
2983a65
fix collisions tests, remove long deprecated terasliceOpPath
jsnoble Nov 25, 2024
3375fb5
add tests for asset versioning with naming collisions
jsnoble Nov 25, 2024
fdf7cfd
fix tests
jsnoble Nov 25, 2024
4dad7d9
remove double await
jsnoble Nov 25, 2024
9e53448
fix testHarness to work with new operation loading
jsnoble Nov 26, 2024
99af228
Merge branch 'master' into op-asset-specificity
jsnoble Nov 26, 2024
eedf446
release: (minor) [email protected]
jsnoble Nov 26, 2024
89890f1
merge master
jsnoble Dec 2, 2024
975b7d4
release: (minor) [email protected]
jsnoble Dec 2, 2024
00be786
make versioning backwards compatible with validateJobs functions on s…
jsnoble Dec 2, 2024
bd3b203
remove duplicate jobValidation checks
jsnoble Dec 3, 2024
1ee5c6b
fix lint issues
jsnoble Dec 3, 2024
2b38e98
fix auto api name creation issue, add more tests
jsnoble Dec 6, 2024
d6603e4
Merge branch 'master' into op-asset-specificity
jsnoble Dec 6, 2024
3b33182
bump: (patch) @terascope/[email protected], @terascope/[email protected]
jsnoble Dec 6, 2024
7029a33
merge master
jsnoble Dec 6, 2024
a41b8dd
fix tests and lint errors
jsnoble Dec 6, 2024
9e6fcab
fix test
jsnoble Dec 6, 2024
d8c6811
merge from master
jsnoble Jan 2, 2025
d0057fb
fix bug with asset apis names and docs
jsnoble Jan 3, 2025
ccac491
fix docs and put check in place when to alter api names
jsnoble Jan 6, 2025
435f3f0
fix docs and typos
jsnoble Jan 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions e2e/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@
"ms": "~2.1.3"
},
"devDependencies": {
"@terascope/types": "~1.3.1",
"@terascope/types": "~1.3.2",
"bunyan": "~1.8.15",
"elasticsearch-store": "~1.4.2",
"elasticsearch-store": "~1.4.3",
"fs-extra": "~11.2.0",
"ms": "~2.1.3",
"nanoid": "~5.0.8",
Expand Down
73 changes: 71 additions & 2 deletions e2e/test/cases/assets/simple-spec.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import 'jest-extended';
import fs from 'node:fs';
import os from 'os';
import path from 'path';
import os from 'node:os';
import path from 'node:path';
import decompress from 'decompress';
import archiver from 'archiver';
import {
Expand Down Expand Up @@ -72,6 +72,24 @@ describe('assets', () => {
{ blocking: true }
);

/*
jsnoble marked this conversation as resolved.
Show resolved Hide resolved
{
"name": "ex1",
"version": "0.0.1",
"node_version": 18,
"platform": false,
"arch": false
}

{
"name": "ex1",
"version": "0.1.1",
"node_version": 18,
"platform": false,
"arch": false
}

*/
const newerAssetPath = 'test/fixtures/assets/example_asset_1updated.zip';
const fileStream = fs.createReadStream(newerAssetPath);
// the asset on this job already points to 'ex1' so it should use the latest available asset
Expand Down Expand Up @@ -134,6 +152,57 @@ describe('assets', () => {

await ex.stop({ blocking: true });
});

it('will throw if there are naming conflicts', async () => {
const jobSpec = terasliceHarness.newJob('generator-asset');
// Set resource constraints on workers within CI
if (TEST_PLATFORM === 'kubernetes' || TEST_PLATFORM === 'kubernetesV2') {
jobSpec.resources_requests_cpu = 0.1;
}
// the previous test confirms the newer version will be used by default
// now we test to see if we can select the older version
jsnoble marked this conversation as resolved.
Show resolved Hide resolved
jobSpec.assets = ['ex1:0.0.1', 'ex1:0.1.1', 'standard', 'elasticsearch'];

await expect(terasliceHarness.submitAndStart(jobSpec)).rejects.toThrow();
});

it('will not throw if there are naming conflicts but you use asset identifiers', async () => {
const jobSpec = terasliceHarness.newJob('generator-asset');
// Set resource constraints on workers within CI
if (TEST_PLATFORM === 'kubernetes' || TEST_PLATFORM === 'kubernetesV2') {
jobSpec.resources_requests_cpu = 0.1;
}
// the previous test confirms the newer version will be used by default
// now we test to see if we can select the older version
jsnoble marked this conversation as resolved.
Show resolved Hide resolved
jobSpec.assets = ['ex1:0.0.1', 'ex1:0.1.1', 'standard', 'elasticsearch'];
jobSpec.operations = jobSpec.operations.map((op) => {
if (op._op === 'drop_property') {
return {
...op,
_op: 'drop_property@ex1:0.1.1'
};
}
return op;
});
const { workers } = jobSpec;

const assetResponse = await terasliceHarness.teraslice.assets.getAsset('ex1', '0.0.1');
const assetId = assetResponse[0].id;

const ex = await terasliceHarness.submitAndStart(jobSpec);

const waitResponse = await terasliceHarness.forWorkersJoined(
ex.id(),
workers as number,
25
);
expect(waitResponse).toEqual(workers);

const execution = await ex.config();
expect(execution.assets[0]).toEqual(assetId);

await ex.stop({ blocking: true });
});
});

describe('s3 asset storage', () => {
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice-workspace",
"displayName": "Teraslice",
"version": "2.9.1",
"version": "2.10.0",
"private": true,
"homepage": "https://github.com/terascope/teraslice",
"bugs": {
Expand Down
10 changes: 5 additions & 5 deletions packages/data-mate/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/data-mate",
"displayName": "Data-Mate",
"version": "1.4.2",
"version": "1.4.3",
"description": "Library of data validations/transformations",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/data-mate#readme",
"repository": {
Expand Down Expand Up @@ -30,9 +30,9 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/data-types": "~1.4.1",
"@terascope/types": "~1.3.1",
"@terascope/utils": "~1.4.1",
"@terascope/data-types": "~1.4.2",
"@terascope/types": "~1.3.2",
"@terascope/utils": "~1.4.2",
"@types/validator": "~13.12.2",
"awesome-phonenumber": "~7.2.0",
"date-fns": "~4.1.0",
Expand All @@ -46,7 +46,7 @@
"uuid": "~11.0.3",
"valid-url": "~1.0.9",
"validator": "~13.12.0",
"xlucene-parser": "~1.4.2"
"xlucene-parser": "~1.4.3"
},
"devDependencies": {
"@types/ip6addr": "~0.2.6",
Expand Down
6 changes: 3 additions & 3 deletions packages/data-types/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/data-types",
"displayName": "Data Types",
"version": "1.4.1",
"version": "1.4.2",
"description": "A library for defining the data structures and mapping",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/data-types#readme",
"bugs": {
Expand All @@ -27,8 +27,8 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/types": "~1.3.1",
"@terascope/utils": "~1.4.1",
"@terascope/types": "~1.3.2",
"@terascope/utils": "~1.4.2",
"graphql": "~16.9.0",
"lodash": "~4.17.21",
"yargs": "~17.7.2"
Expand Down
8 changes: 4 additions & 4 deletions packages/elasticsearch-api/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/elasticsearch-api",
"displayName": "Elasticsearch API",
"version": "4.4.1",
"version": "4.4.2",
"description": "Elasticsearch client api used across multiple services, handles retries and exponential backoff",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/elasticsearch-api#readme",
"bugs": {
Expand All @@ -24,16 +24,16 @@
"test:watch": "TEST_RESTRAINED_ELASTICSEARCH='true' ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/types": "~1.3.1",
"@terascope/utils": "~1.4.1",
"@terascope/types": "~1.3.2",
"@terascope/utils": "~1.4.2",
"bluebird": "~3.7.2",
"setimmediate": "~1.0.5"
},
"devDependencies": {
"@opensearch-project/opensearch": "~1.2.0",
"@types/elasticsearch": "~5.0.43",
"elasticsearch": "~15.4.1",
"elasticsearch-store": "~1.4.2",
"elasticsearch-store": "~1.4.3",
"elasticsearch6": "npm:@elastic/elasticsearch@~6.8.0",
"elasticsearch7": "npm:@elastic/elasticsearch@~7.17.0",
"elasticsearch8": "npm:@elastic/elasticsearch@~8.15.0"
Expand Down
12 changes: 6 additions & 6 deletions packages/elasticsearch-store/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "elasticsearch-store",
"displayName": "Elasticsearch Store",
"version": "1.4.2",
"version": "1.4.3",
"description": "An API for managing an elasticsearch index, with versioning and migration support.",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/elasticsearch-store#readme",
"bugs": {
Expand Down Expand Up @@ -30,10 +30,10 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/data-mate": "~1.4.2",
"@terascope/data-types": "~1.4.1",
"@terascope/types": "~1.3.1",
"@terascope/utils": "~1.4.1",
"@terascope/data-mate": "~1.4.3",
"@terascope/data-types": "~1.4.2",
"@terascope/types": "~1.3.2",
"@terascope/utils": "~1.4.2",
"ajv": "~8.17.1",
"ajv-formats": "~3.0.1",
"elasticsearch6": "npm:@elastic/elasticsearch@~6.8.0",
Expand All @@ -43,7 +43,7 @@
"opensearch2": "npm:@opensearch-project/opensearch@~2.12.0",
"setimmediate": "~1.0.5",
"uuid": "~11.0.3",
"xlucene-translator": "~1.4.2"
"xlucene-translator": "~1.4.3"
},
"devDependencies": {
"@types/uuid": "~10.0.0"
Expand Down
6 changes: 3 additions & 3 deletions packages/job-components/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/job-components",
"displayName": "Job Components",
"version": "1.6.2",
"version": "1.6.3",
"description": "A teraslice library for validating jobs schemas, registering apis, and defining and running new Job APIs",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/job-components#readme",
"bugs": {
Expand Down Expand Up @@ -32,8 +32,8 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/types": "~1.3.1",
"@terascope/utils": "~1.4.1",
"@terascope/types": "~1.3.2",
"@terascope/utils": "~1.4.2",
"convict": "~6.2.4",
"convict-format-with-moment": "~6.2.0",
"convict-format-with-validator": "~6.2.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/job-components/src/execution-context/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ export default class BaseExecutionContext<T extends OperationLifeCycle> {
this.events.on('execution:add-to-lifecycle', this._handlers['execution:add-to-lifecycle']);

const executionConfig = cloneDeep(config.executionConfig);

this._loader = new OperationLoader({
terasliceOpPath: config.terasliceOpPath,
assetPath: config.context.sysconfig.teraslice.assets_directory,
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import { APICore, OperationAPIType } from '../operations/index.js';
export interface ExecutionContextConfig {
context: Context;
executionConfig: ExecutionConfig;
terasliceOpPath?: string;
assetIds?: string[];
}

Expand Down
55 changes: 39 additions & 16 deletions packages/job-components/src/job-validator.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import convict from 'convict';
import { cloneDeep, pMap } from '@terascope/utils';
import { Teraslice } from '@terascope/types';
import {
Context, OpConfig, ValidatedJobConfig
} from './interfaces';
import { Context, OpConfig, ValidatedJobConfig } from './interfaces';
import { validateJobConfig } from './config-validators.js';
import { jobSchema } from './job-schemas.js';
import { OperationLoader } from './operation-loader/index.js';
import { OperationLoader, parseName } from './operation-loader/index.js';
import { registerApis } from './register-apis.js';
import { OperationAPIConstructor, OperationModule } from './operations/index.js';

Expand All @@ -15,11 +13,11 @@ export class JobValidator {
private readonly context: Context;
private readonly opLoader: OperationLoader;

constructor(context: Context, options: { terasliceOpPath?: string } = {}) {
constructor(context: Context) {
this.context = context;
this.opLoader = new OperationLoader({
terasliceOpPath: options.terasliceOpPath,
assetPath: context.sysconfig.teraslice.assets_directory,
validate_name_collisions: true
});
this.schema = jobSchema(context);
}
Expand All @@ -34,9 +32,14 @@ export class JobValidator {
const apis: Record<string, OperationAPIConstructor> = {};

type ValidateJobFn = (job: ValidatedJobConfig) => void;
let validateJobFns: ValidateJobFn[] = [];

const handleModule = (opConfig: OpConfig, op: OperationModule) => {
const validateJobFns: ValidateJobFn[] = [];
const validateApisFns: ValidateJobFn[] = [];

const handleModule = (
opConfig: OpConfig,
op: OperationModule,
index: number
) => {
const { Schema, API } = op;

if (API != null) {
Expand All @@ -47,7 +50,15 @@ export class JobValidator {

validateJobFns.push((job) => {
if (!schema.validateJob) return;

const originalName = opConfig._op;
const { name } = parseName(originalName);

// for backwards compatible checks, alter name so it can be found
job.operations[index]._op = name;
schema.validateJob(job);
// revert name back to original
job.operations[index]._op = originalName;
});

return schema.validate(opConfig);
Expand All @@ -57,35 +68,47 @@ export class JobValidator {
if (index === 0) {
return handleModule(
opConfig,
await this.opLoader.loadReader(opConfig._op, assetIds)
await this.opLoader.loadReader(opConfig._op, assetIds),
index
);
}

return handleModule(
opConfig,
await this.opLoader.loadProcessor(opConfig._op, assetIds)
await this.opLoader.loadProcessor(opConfig._op, assetIds),
index
);
});

// this needs to happen first because it can add apis to the job
// though usage of the ensureAPIFromConfig api that called inside
jsnoble marked this conversation as resolved.
Show resolved Hide resolved
// many validateJob schema methods
validateJobFns.forEach((fn) => {
fn(jobConfig);
});

validateJobFns = [];

jobConfig.apis = await pMap(jobConfig.apis, async (apiConfig) => {
jobConfig.apis = await pMap(jobConfig.apis, async (apiConfig, index) => {
const { Schema } = await this.opLoader.loadAPI(apiConfig._name, assetIds);
const schema = new Schema(this.context, 'api');

validateJobFns.push((job) => {
validateApisFns.push((job) => {
if (!schema.validateJob) return;

const originalName = apiConfig._name;
const { name } = parseName(originalName);

// for backwards compatible checks, alter name so it can be found
job.apis[index]._name = name;
schema.validateJob(job);
// revert name back to original
jsnoble marked this conversation as resolved.
Show resolved Hide resolved
job.apis[index]._name = originalName;
});

return schema.validate(apiConfig);
});

validateJobFns.forEach((fn) => {
// this can mutate the job
validateApisFns.forEach((fn) => {
fn(jobConfig);
});

Expand Down
1 change: 1 addition & 0 deletions packages/job-components/src/operation-loader/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export * from './interfaces.js';
export * from './loader.js';
export * from './utils.js';
Loading
Loading