Skip to content

Commit

Permalink
Convert execution controller IP to a k8s service in kubernetesV2 back…
Browse files Browse the repository at this point in the history
…end (#3718)

This PR makes the following changes:

- Makes V2 backend compatible with `--dev` option 
- Bumps `teraslice` from `v2.1.0` `v2.2.0`
- Replaces ex `pod_ip` host name with static k8s service host name
- This allows workers to reconnect with an `execution_controller` pod
when an execution controller fails and creates a new pod
- _**NOTE:**_ These changes only apply when the `clustering_type` is set
to `kubernetesV2`


Ref to issue #3696
  • Loading branch information
sotojn authored Aug 16, 2024
1 parent b7d7ce6 commit 71ebd9b
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 31 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice-workspace",
"displayName": "Teraslice",
"version": "2.1.0",
"version": "2.2.0",
"private": true,
"homepage": "https://github.com/terascope/teraslice",
"bugs": {
Expand Down
2 changes: 1 addition & 1 deletion packages/teraslice/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "teraslice",
"displayName": "Teraslice",
"version": "2.1.0",
"version": "2.2.0",
"description": "Distributed computing platform for processing JSON data",
"homepage": "https://github.com/terascope/teraslice#readme",
"bugs": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,25 @@ export class KubernetesClusterBackendV2 {
this.logger.debug(exJob, 'execution allocating slicer');

const jobResult = await this.k8s.post(exJob, 'job') as K8sClient.V1Job;

// I need to add these here to create the ex service resource
// @ts-expect-error
execution.k8sName = jobResult.metadata.name;
// @ts-expect-error
execution.k8sUid = jobResult.metadata.uid;

const exServiceResource = new K8sResource(
'services',
'execution_controller',
this.context.sysconfig.teraslice,
execution,
this.logger
);

const exService = exServiceResource.resource;

const serviceResult = await this.k8s.post(exService, 'service') as K8sClient.V1Service;

this.logger.debug(jobResult, 'k8s slicer job submitted');

let controllerLabel: string;
Expand All @@ -131,6 +150,8 @@ export class KubernetesClusterBackendV2 {

const controllerUid = jobResult.spec?.selector?.matchLabels?.[controllerLabel];

// Right now this is waiting for the selected pod to come up in a "running"
// state. It may be better to check for a readiness probe instead
const pod = await this.k8s.waitForSelectedPod(
`${controllerLabel}=${controllerUid}`,
undefined,
Expand All @@ -141,8 +162,11 @@ export class KubernetesClusterBackendV2 {
const error = new Error('pod.status.podIP must be defined');
return Promise.reject(error);
}
this.logger.debug(`Slicer is using IP: ${pod.status.podIP}`);
execution.slicer_hostname = `${pod.status.podIP}`;
const exServiceName = serviceResult.metadata?.name;
const exServiceHostName = `${exServiceName}.${this.k8s.defaultNamespace}`;
this.logger.debug(`Slicer is using host name: ${exServiceHostName}`);

execution.slicer_hostname = `${exServiceHostName}`;

return execution;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,36 +75,60 @@ export class K8sResource {
this.templateConfig = this._makeConfig();
this.resource = this.templateGenerator(this.templateConfig);

this._setJobLabels();

// Apply job `targets` setting as k8s nodeAffinity
// We assume that multiple targets require both to match ...
// NOTE: If you specify multiple `matchExpressions` associated with
// `nodeSelectorTerms`, then the pod can be scheduled onto a node
// only if *all* `matchExpressions` can be satisfied.
this._setTargets();
this._setResources();
this._setVolumes();
this._setAssetsVolume();
this._setImagePullSecret();
this._setEphemeralStorage();
this._setExternalPorts();
this._setPriorityClassName();
if (resourceType !== 'services') {
this._setJobLabels();

// Apply job `targets` setting as k8s nodeAffinity
// We assume that multiple targets require both to match ...
// NOTE: If you specify multiple `matchExpressions` associated with
// `nodeSelectorTerms`, then the pod can be scheduled onto a node
// only if *all* `matchExpressions` can be satisfied.
this._setTargets();
this._setResources();
this._setVolumes();
if (process.env.MOUNT_LOCAL_TERASLICE !== undefined) {
this._mountLocalTeraslice(resourceName);
}
this._setEnvVariables();
this._setAssetsVolume();
this._setImagePullSecret();
this._setEphemeralStorage();
this._setExternalPorts();
this._setPriorityClassName();

if (resourceName === 'worker') {
this._setWorkerAntiAffinity();
}

if (resourceName === 'worker') {
this._setWorkerAntiAffinity();
}
// Execution controller targets are required nodeAffinities, if
// required job targets are also supplied, then *all* of the matches
// will have to be satisfied for the job to be scheduled. This also
// adds tolerations for any specified targets
if (resourceName === 'execution_controller') {
this._setExecutionControllerTargets();
}

// Execution controller targets are required nodeAffinities, if
// required job targets are also supplied, then *all* of the matches
// will have to be satisfied for the job to be scheduled. This also
// adds tolerations for any specified targets
if (resourceName === 'execution_controller') {
this._setExecutionControllerTargets();
if (this.terasliceConfig.kubernetes_overrides_enabled) {
this._mergePodSpecOverlay();
}
}
}

_setEnvVariables() {
/// TODO: Use this later when we need to set env vars in workers/ex controllers
}

if (this.terasliceConfig.kubernetes_overrides_enabled) {
this._mergePodSpecOverlay();
_mountLocalTeraslice(contextType: string): void {
const devMounts = JSON.parse(process.env.MOUNT_LOCAL_TERASLICE as string);
this.resource.spec.template.spec.containers[0].volumeMounts.push(...devMounts.volumeMounts);
this.resource.spec.template.spec.volumes.push(...devMounts.volumes);

if (contextType === 'execution_controller') {
this.resource.spec.template.spec.containers[0].args = [
'yarn',
'node',
'service.js'
];
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"kind": "Service",
"apiVersion": "v1",
"metadata": {
"name": "svc-{{name}}",
"namespace": "{{namespace}}",
"ownerReferences": [
{
"apiVersion": "batch/v1",
"controller": false,
"blockOwnerDeletion": false,
"kind": "Job",
"name": "{{exName}}",
"uid": "{{exUid}}"
}
],
"labels": {
"app.kubernetes.io/name": "teraslice",
"app.kubernetes.io/component": "execution_controller",
"teraslice.terascope.io/exId": "{{exId}}",
"teraslice.terascope.io/jobId": "{{jobId}}"
}
},
"spec": {
"selector": {
"app.kubernetes.io/component": "execution_controller",
"teraslice.terascope.io/exId": "{{exId}}"
},
"ports": [
{
"port": 45680,
"targetPort": 45680
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ export class ExecutionController {
if (this.metrics != null) {
await this.metrics.initialize();
}

/// We set this to true later down the line. Not sure why
this.isInitialized = true;

this.server.onClientOnline((workerId) => {
Expand Down

0 comments on commit 71ebd9b

Please sign in to comment.