pipelines/frontend/server/k8s-helper.ts

130 lines
4.0 KiB
TypeScript

// Copyright 2018 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// @ts-ignore
import { Config, Core_v1Api } from '@kubernetes/client-node';
import * as fs from 'fs';
import * as Utils from './utils';
// If this is running inside a k8s Pod, its namespace should be written at this
// path, this is also how we can tell whether we're running in the cluster.
const namespaceFilePath = '/var/run/secrets/kubernetes.io/serviceaccount/namespace';
let namespace = '';
let k8sV1Client: Core_v1Api | null = null;
export const isInCluster = fs.existsSync(namespaceFilePath);
if (isInCluster) {
namespace = fs.readFileSync(namespaceFilePath, 'utf-8');
k8sV1Client = Config.defaultClient();
}
/**
* Creates a new Tensorboard pod.
*/
export async function newTensorboardPod(logdir: string): Promise<void> {
if (!k8sV1Client) {
throw new Error('Cannot access kubernetes API');
}
const currentPod = await getTensorboardAddress(logdir);
if (currentPod) {
return;
}
// TODO: take the configuration below to a separate file
const pod = {
kind: 'Pod',
metadata: {
generateName: 'tensorboard-',
},
spec: {
containers: [{
args: [
'tensorboard',
'--logdir',
logdir,
],
image: 'tensorflow/tensorflow',
name: 'tensorflow',
ports: [{
containerPort: 6006,
}],
env: [{
name: 'GOOGLE_APPLICATION_CREDENTIALS',
value: '/secret/gcp-credentials/user-gcp-sa.json'
}],
volumeMounts: [{
mountPath: '/secret/gcp-credentials',
name: 'gcp-credentials',
}],
}],
volumes: [{
name: 'gcp-credentials',
secret: {
secretName: 'user-gcp-sa',
},
}],
},
};
await k8sV1Client.createNamespacedPod(namespace, pod as any);
}
/**
* Finds a running Tensorboard pod with the given logdir as an argument and
* returns its pod IP and port.
*/
export async function getTensorboardAddress(logdir: string): Promise<string> {
if (!k8sV1Client) {
throw new Error('Cannot access kubernetes API');
}
const pods = (await k8sV1Client.listNamespacedPod(namespace)).body.items;
const args = ['tensorboard', '--logdir', logdir];
const pod = pods.find((p) =>
p.status.phase === 'Running' &&
!p.metadata.deletionTimestamp && // Terminating/terminated pods have this set
!!p.spec.containers.find((c) => Utils.equalArrays(c.args, args)));
return pod && pod.status.podIP ? `http://${pod.status.podIP}:6006` : '';
}
/**
* Polls every second for a running Tensorboard pod with the given logdir, and
* returns the address of one if found, or rejects if a timeout expires.
*/
export function waitForTensorboard(logdir: string, timeout: number): Promise<string> {
const start = Date.now();
return new Promise((resolve, reject) => {
setInterval(async () => {
if (Date.now() - start > timeout) {
reject('Timed out waiting for tensorboard');
}
const tensorboardAddress = await getTensorboardAddress(logdir);
if (tensorboardAddress) {
resolve(encodeURIComponent(tensorboardAddress));
}
}, 1000);
});
}
export function getPodLogs(podName: string): Promise<string> {
if (!k8sV1Client) {
throw new Error('Cannot access kubernetes API');
}
return (k8sV1Client.readNamespacedPodLog(podName, namespace, 'main') as any)
.then(
(response: any) => (response && response.body) ? response.body.toString() : '',
(error: any) => { throw new Error(JSON.stringify(error.body)); }
);
}