Merge branch 'master' into patch-1

This commit is contained in:
govind cs 2019-01-21 09:49:12 +05:30 committed by GitHub
commit 225a7e9f90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
262 changed files with 127607 additions and 164021 deletions

View File

@ -13,7 +13,7 @@ ignore=third_party
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=prediction_pb2.py,prediction_pb2_grpc.py,mnist_DDP.py,mnistddpserving.py
ignore-patterns=object_detection_grpc_client.py,prediction_pb2.py,prediction_pb2_grpc.py,mnist_DDP.py,mnistddpserving.py
# Pickle collected data for later comparisons.
persistent=no

View File

@ -7,7 +7,6 @@ acting as a starting point for new users and a reference guide for experienced u
This repository is home to the following types of examples and demos:
* [End-to-end](#end-to-end)
* [Component-focused](#component-focused)
* [Application-specific](#application-specific)
* [Demos](#demos)
## End-to-end
@ -81,11 +80,6 @@ This example covers the following concepts:
1. GCS and GKE
1. Serving with Seldon Core
## Application-specific
1.
## Demos
Demos are for showing Kubeflow or one of its components publicly, with the

View File

@ -1,4 +1,8 @@
IMG = gcr.io/kubeflow-examples/code-search
# Makefile for building images
#
# To override variables do
# make ${TARGET} ${VAR}=${VALUE}
IMG ?= gcr.io/kubeflow-examples/code-search
# List any changed files. We only include files in the notebooks directory.
# because that is the code in the docker image.
@ -19,6 +23,15 @@ all: build
TF_VERSION=1.11.0
# Whether to use cached images with GCB
USE_IMAGE_CACHE ?= true
echo:
@echo IMG=$(IMG)
@echo GIT_VERSION=$(GIT_VERSION)
@echo TAG=$(TAG)
# To build without the cache set the environment variable
# export DOCKER_BUILD_OPTS=--no-cache
build-cpu:
@ -48,13 +61,17 @@ build-dataflow:
build: build-cpu build-gpu build-dataflow
# Build using GCB. This is useful if we are on a slow internet connection
# and don't want to pull
build-gcb:
# Build the GCB workflow
build-gcb-spec:
rm -rf ./build
mkdir -p build
jsonnet ./docker/t2t/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
jsonnet ./docker/t2t/build.jsonnet --ext-str imageBase=$(IMG) \
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
> ./build/build.json
# Build using GCB. This is useful if we are on a slow internet connection
# and don't want to pull
build-gcb: build-gcb-spec
cp -r ./docker ./build/
cp -r ./src ./build/
rm -rf ./build/src/code_search/dataflow/cli/test_data

View File

@ -26,6 +26,16 @@ We are using the following project
```
ks12 apply cs_demo -c search-index-server
```
1. Copy the GCP service account to the namespace where the servers run
* The serving piece runs in a different namespace from Kubeflow
* We need to copy the GCP service account to that namespace because the pod will try to mount it.
```
kubectl -n kubeflow get secret user-gcp-sa -o json | jq -r '.data["user-gcp-sa.json"]' | base64 -d > ${SECRET_FILE}
kubectl -n cs-web-app create secret generic user-gcp-sa --from-file=user-gcp-sa.json=${SECRET_FILE}
```
# Install Argo CD
```

View File

@ -0,0 +1,145 @@
"""Count lines of code in different types of file.
This has nothing to do with actually running code search.
The sole purpose of this script is to collect data for the presentation to
illustrate the point that most effort isn't spent on ML.
"""
import argparse
import csv
import logging
import os
import re
import sys
import tempfile
# Mapping from categories to regexes to include
# These are applied to the full path.
MATCH_RES = {
"dataflow": [re.compile(r".*dataflow.*\.py")],
"packaging (e.g dockerfile)": [
re.compile(".*Dockerfile.*"),
re.compile(r"code_search/src/.*requirements.*\.txt")],
"cloud config": [re.compile(".*gcp_config.*")],
"k8s & kubeflow config": [
re.compile(r".*/cs-demo-1103/ks_app/components/.*"),
re.compile(r".*/cs-demo-1103/k8s_specs/.*")],
"model": [
re.compile(r".*t2t/.*\.py")
],
"serving k8s config": [
re.compile(r".*/ks-web-app/components/.*"),
],
"batch k8s config": [
re.compile(r".*/kubeflow/components/.*"),
],
"serving code": [
re.compile(r".*/code_search/nmslib/.*\.py"),
re.compile(r".*/ui.*\.js$"),
],
}
# Regexes matching files to exclude
NAME_EXCLUDES = [
re.compile(r".*\.pyc"),
re.compile(r"__init__\.py"),
]
class Results(object):
def __init__(self):
self.files = []
self.loc = 0
def add_file(self, full_path):
self.files.append(full_path)
with open(full_path) as hf:
lines = hf.readlines()
self.loc += len(lines)
@property
def num_files(self):
return len(self.files)
def classify_files(root_dir):
"""Return lists of files in different categories
Args:
root_dir: Root directory to begin searching in
Returns:
categories: Dictionary mapping a category to list of files.
"""
categories = {}
for k in MATCH_RES.iterkeys():
categories[k] = Results()
for root, _, files in os.walk(root_dir):
for name in files:
full_path = os.path.join(root, name)
exclude = False
for m in NAME_EXCLUDES:
if m.match(name):
exclude = True
break
if exclude:
continue
for k, patterns in MATCH_RES.iteritems():
for p in patterns:
if p.match(full_path):
categories[k].add_file(full_path)
break
return categories
def main():
logging.basicConfig(level=logging.INFO,
format=('%(levelname)s|%(asctime)s'
'|%(pathname)s|%(lineno)d| %(message)s'),
datefmt='%Y-%m-%dT%H:%M:%S',
)
logging.getLogger().setLevel(logging.INFO)
parser = argparse.ArgumentParser(
description="Create a CSV file containing # of PRs by company.")
parser.add_argument(
"--output",
default="",
type=str,
help="The file to write.")
args = parser.parse_args()
if not args.output:
with tempfile.NamedTemporaryFile(prefix="tmpCS_demo_code_stats", dir=None,
suffix=".csv",
delete=True) as hf:
args.output = hf.name
logging.info("--output not specified; defaulting to %s", args.output)
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
logging.info("root_dir")
categories = classify_files(root_dir)
for k, v in categories.iteritems():
for path in v.files:
print(k, path)
logging.info("Writing output to %s", args.output)
with open(args.output, "w") as hf:
writer = csv.writer(hf)
std_writer = csv.writer(sys.stdout)
row = ["category", "number of files", "lines of code"]
writer.writerow(row)
std_writer.writerow(row)
for k, v in categories.iteritems():
row = [k, v.num_files, v.loc]
writer.writerow(row)
std_writer.writerow(row)
if __name__ == "__main__":
main()

View File

@ -348,7 +348,7 @@ spec:
- --status-processors
- "20"
- --operation-processors
- "10"
- "10"
image: argoproj/argocd-application-controller:v0.10.6
name: application-controller
serviceAccountName: application-controller
@ -402,6 +402,7 @@ spec:
- /shared/app
- --repo-server
- argocd-repo-server:8081
- --insecure
image: argoproj/argocd-server:v0.10.6
name: argocd-server
readinessProbe:

View File

@ -0,0 +1,29 @@
# Ambassador is only monitoring namespace kubeflow
# so we define another service inside the kubeflow namespace
# to do the mapping.
apiVersion: v1
kind: Service
metadata:
annotations:
getambassador.io/config: |-
---
apiVersion: ambassador/v0
kind: Mapping
name: argo-cd-mapping
prefix: /argocd/
rewrite: /argocd/
service: argocd-server.argocd
name: argocd-mapping
namespace: kubeflow
spec:
type: ExternalName
externalName: argocd-server.argocd.svc.cluster.local
ports:
- name: http
port: 80
protocol: TCP
targetPort: 8080
- name: https
port: 443
protocol: TCP
targetPort: 8080

View File

@ -40,7 +40,7 @@
useJupyterLabAsDefault: "false",
},
centraldashboard: {
image: "gcr.io/kubeflow-images-public/centraldashboard:v0.3.0",
image: "gcr.io/kubeflow-images-public/centraldashboard:v0.3.4",
name: "centraldashboard",
},
"tf-job-operator": {

View File

@ -267,6 +267,7 @@
],
resources: [
"pods",
"pods/log",
"services",
],
verbs: [

View File

@ -17,8 +17,8 @@ spec:
# with the web app components is checked in.
# repoURL: https://github.com/kubeflow/examples.git
# targetRevision: HEAD
repoURL: https://github.com/jlewi/examples.git
targetRevision: cs_demo_argo_cd
repoURL: https://github.com/kubeflow/examples.git
targetRevision: master
syncPolicy:
automated:
prune: True

View File

@ -20,7 +20,7 @@ The test can be run as follows
```
cd code_search/src
python3 -m code_searcch.t2t.similarity_transformer_export_test
python3 -m code_search.t2t.similarity_transformer_test
```
The test just runs the relevant T2T steps and verifies they succeeds. No additional
checks are executed.

View File

@ -17,26 +17,26 @@ component="search-index-creator"
usage() {
echo "Usage: launch_search_index_creator_job.sh
--workflowId=<workflow id invoking the container>
--cluster=<cluster to deploy job to>
--functionEmbeddingsDir=<input function embedding dir>
--indexFile=<index file>
--lookupFile=<lookup file>
--functionEmbeddingsDir=<input function embedding dir>
--timeout=<timeout>
--namespace=<kubernetes namespace>
--cluster=<cluster to deploy job to>"
--timeout=<timeout>
--workflowId=<workflow id invoking the container>"
}
# List of required parameters
names=(workflowId indexFile lookupFile dataDir namespace cluster)
names=(cluster functionEmbeddingsDir indexFile lookupFile namespace workflowId)
source "${DIR}/parse_arguments.sh"
source "${DIR}/initialize_kubectl.sh"
# Apply parameters
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
ks param set ${component} lookupFile ${lookupFile} --env ${ksEnvName}
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
ks show ${ksEnvName} -c "${component}"
ks apply ${ksEnvName} -c "${component}"

View File

@ -21,37 +21,36 @@ workerMachineType=n1-highcpu-32
usage() {
echo "Usage: submit_code_embeddings_job.sh
--workflowId=<workflow id invoking the container>
--modelDir=<directory contains the model>
--dataDir=<data dir>
--functionEmbeddingsDir=<output function embedding dir>
--tokenPairsBQTable=<input token pairs BQ table>
--cluster=<cluster to deploy job to>
--dataDir=<data dir containing the pre generated vocabulary file>
--functionEmbeddingsBQTable=<output function embedding BQ table>
--functionEmbeddingsDir=<output function embedding dir>
--modelDir=<directory contains the model>
--namespace=<kubernetes namespace>
--numWorkers=<num of workers>
--project=<project>
--timeout=<timeout>
--workerMachineType=<worker machine type>
--workingDir=<working dir>
--cluster=<cluster to deploy job to>
--namespace=<kubernetes namespace>"
--workflowId=<workflow id invoking the container>
--workingDir=<working dir>"
}
# List of required parameters
names=(dataDir modelDir functionEmbeddingsDir tokenPairsBQTable functionEmbeddingsBQTable workingDir workflowId cluster namespace)
names=(cluster dataDir functionEmbeddingsBQTable functionEmbeddingsDir modelDir namespace project workflowId workingDir)
source "${DIR}/parse_arguments.sh"
source "${DIR}/initialize_kubectl.sh"
# Apply parameters
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
ks param set ${component} tokenPairsBQTable ${tokenPairsBQTable} --env ${ksEnvName}
ks param set ${component} functionEmbeddingsBQTable ${functionEmbeddingsBQTable} --env ${ksEnvName}
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
ks param set ${component} modelDir ${modelDir} --env ${ksEnvName}
ks param set ${component} project ${project} --env ${ksEnvName}
ks param set ${component} workingDir ${workingDir} --env ${ksEnvName}
ks param set ${component} numWorkers ${numWorkers} --env ${ksEnvName}
ks param set ${component} project ${project} --env ${ksEnvName}
ks param set ${component} workerMachineType ${workerMachineType} --env ${ksEnvName}
ks param set ${component} workingDir ${workingDir} --env ${ksEnvName}
ks show ${ksEnvName} -c "${component}"
ks apply ${ksEnvName} -c "${component}"

View File

@ -8,22 +8,23 @@ set -ex
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
branch=master
# Ksonnet Environment name. Always use pipeline
ksEnvName="pipeline"
usage() {
echo "Usage: update_index.sh
--baseGitRepo=<base git repo name>
--baseBranch=<base branch>
--appDir=<ksonnet app dir>
--baseBranch=<base branch>
--baseGitRepo=<base git repo name>
--botEmail=<email account of the bot that send the PR>
--forkGitRepo=<github repo with Argo CD hooked up>
--env=<ksonnet environment>
--indexFile=<index file>
--lookupFile=<lookup file>
--workflowId=<workflow id invoking the container>
--botEmail=<email account of the bot that send the PR>"
--workflowId=<workflow id invoking the container>"
}
# List of required parameters
names=(baseGitRepo baseBranch appDir forkGitRepo env indexFile lookupFile workflowId botEmail)
names=(appDir baseBranch baseGitRepo botEmail forkGitRepo indexFile lookupFile workflowId)
source "${DIR}/parse_arguments.sh"
@ -44,8 +45,8 @@ git fetch upstream
git merge upstream/${baseBranch} master
git checkout -b ${workflowId}
ks param set --env=${env} search-index-server indexFile ${indexFile}
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
ks param set --env=${ksEnvName} search-index-server indexFile ${indexFile}
ks param set --env=${ksEnvName} search-index-server lookupFile ${lookupFile}
git add . && git commit -m "Update the lookup and index file."
FILE=$(mktemp tmp.create_pull_request.XXXX)

View File

@ -3,10 +3,18 @@ ARG BASE_IMAGE_TAG=1.8.0
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
RUN pip --no-cache-dir install oauth2client~=4.1.0 &&\
apt-get update && apt-get install -y jq git &&\
apt-get update && apt-get install -y jq git python3-pip &&\
rm -rf /var/lib/apt/lists/*
RUN pip --no-cache-dir install tensor2tensor~=1.10.0 tensorflow-hub~=0.1.1
RUN pip --no-cache-dir install \
tensor2tensor~=1.10.0 \
tensorflow-hub~=0.1.1 \
six
RUN pip3 --no-cache-dir install \
tensor2tensor~=1.10.0 \
tensorflow-hub~=0.1.1 \
six
ADD src/code_search /app/code_search
ADD src /src

View File

@ -2,84 +2,94 @@
// use latest as a cache so that rebuilds are fast
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
{
"steps": [
{
"id": "pull-cpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["pull", "gcr.io/kubeflow-examples/code-search:latest"],
"waitFor": ["-"],
},
{
"id": "build-cpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
"--label=git-versions=" + std.extVar("gitVersion"),
"--build-arg", "BASE_IMAGE_TAG=1.11.0",
"--file=docker/t2t/Dockerfile",
"--cache-from=gcr.io/kubeflow-examples/code-search:latest",
"."],
"waitFor": ["pull-cpu"],
},
{
"id": "tag-cpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["tag", "gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search:latest",],
"waitFor": ["build-cpu"],
},
{
"id": "pull-gpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["pull", "gcr.io/kubeflow-examples/code-search-gpu:latest"],
"waitFor": ["-"],
},
{
"id": "build-gpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
"--label=git-versions=" + std.extVar("gitVersion"),
"--build-arg", "BASE_IMAGE_TAG=1.11.0-gpu",
"--file=docker/t2t/Dockerfile",
"--cache-from=gcr.io/kubeflow-examples/code-search-gpu:latest",
"."],
"waitFor": ["pull-gpu"],
},
{
"id": "tag-gpu",
"name": "gcr.io/cloud-builders/docker",
"args": ["tag", "gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-gpu:latest",],
"waitFor": ["build-gpu"],
},
{
"id": "pull-dataflow",
"name": "gcr.io/cloud-builders/docker",
"args": ["pull", "gcr.io/kubeflow-examples/code-search-dataflow:latest"],
"waitFor": ["-"],
},
{
"id": "build-dataflow",
"name": "gcr.io/cloud-builders/docker",
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
"--label=git-versions=" + std.extVar("gitVersion"),
"--file=docker/t2t/Dockerfile.dataflow",
"--cache-from=gcr.io/kubeflow-examples/code-search-dataflow:latest",
"."],
"waitFor": ["pull-dataflow"],
},
{
"id": "tag-dataflow",
"name": "gcr.io/cloud-builders/docker",
"args": ["tag", "gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-dataflow:latest",],
"waitFor": ["build-dataflow"],
},
],
"images": ["gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search:latest",
"gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-gpu:latest",
"gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
"gcr.io/kubeflow-examples/code-search-dataflow:latest"],
}
// Convert non-boolean types like string,number to a boolean.
// This is primarily intended for dealing with parameters that should be booleans.
local toBool = function(x) {
result::
if std.type(x) == "boolean" then
x
else if std.type(x) == "string" then
std.asciiUpper(x) == "TRUE"
else if std.type(x) == "number" then
x != 0
else
false,
}.result,
local useImageCache = toBool(std.extVar("useImageCache")),
// A tempalte for defining the steps for building each image.
local subGraphTemplate = {
// following variables must be set
name: null,
dockerFile: null,
buildArg: null,
local template = self,
local pullStep = if useImageCache then [
{
id: "pull-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
waitFor: ["-"],
},
] else [],
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
images: [image, imageLatest],
steps: pullStep +
[
{
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
id: "build-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: [
"build",
"-t",
image,
"--label=git-versions=" + std.extVar("gitVersion"),
]
+ buildArgList
+ [
"--file=" + template.dockerFile,
]
+ cacheList + ["."],
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
},
{
id: "tag-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["tag", image, imageLatest],
waitFor: ["build-" + template.name],
},
],
},
local cpuSteps = subGraphTemplate {
name: "t2t-cpu",
dockerFile: "docker/t2t/Dockerfile",
baseImageTag: "BASE_IMAGE_TAG=1.11.0",
},
local gpuSteps = subGraphTemplate {
name: "t2t-gpu",
dockerFile: "docker/t2t/Dockerfile",
baseImageTag: "BASE_IMAGE_TAG=1.11.0-gpu",
},
local dataflowSteps = subGraphTemplate {
name: "dataflow",
dockerFile: "docker/t2t/Dockerfile.dataflow",
},
steps: cpuSteps.steps + gpuSteps.steps + dataflowSteps.steps,
images: cpuSteps.images + gpuSteps.images + dataflowSteps.images,
}

View File

@ -1,17 +1,21 @@
local params = std.extVar("__ksonnet/params");
local globals = import "globals.libsonnet";
local params = std.extVar('__ksonnet/params');
local globals = import 'globals.libsonnet';
local envParams = params + {
components +: {
// Insert component parameter overrides here. Ex:
// guestbook +: {
// name: "guestbook-dev",
// replicas: params.global.replicas,
// },
components+: {
"search-index-server"+: {
dataDir: 'gs://code-search-demo/models/20181107-dist-sync-gpu',
indexFile: 'gs://code-search-demo/20181104/code-embeddings-index/embeddings.index',
lookupFile: 'gs://code-search-demo/20181104/code-embeddings-index/embedding-to-info.csv',
},
"query-embed-server"+: {
modelBasePath: 'gs://code-search-demo/models/20181107-dist-sync-gpu/export/',
},
},
};
{
components: {
[x]: envParams.components[x] + globals, for x in std.objectFields(envParams.components)
[x]: envParams.components[x] + globals
for x in std.objectFields(envParams.components)
},
}
}

View File

@ -18,6 +18,7 @@
//tokenPairsBQTable: self.project,
//functionEmbeddingsBQTable: "someothervalue",
tokenPairsBQTable: self.project + ":" + self.bqDataset + ".token_pairs",
failedTokenizeBQTable: self.project + ":" + self.bqDataset + ".failed_tokenize",
jobNameSuffix: "20181201-1530",
bqSuffix: std.strReplace(self.jobNameSuffix, "-", "_"),
functionEmbeddingsBQTable: self.project + ":" + self.bqDataset + ".code_embeddings_" + self.bqSuffix,
@ -34,7 +35,5 @@
name: "pipeline",
problem: "kf_github_function_docstring",
project: "code-search-demo",
bqDataset: "code_search",
tokenPairsBQTable: self.project + ":" + self.bqDataset + ".token_pairs",
},
}

View File

@ -15,7 +15,7 @@
// are not picked up by the individual components.
// Need to see if we can find a way to fix this.
local imageTag = "v20181201-ae61193-dirty-d11191",
local imageTag = "v20181204-ee47a49-dirty-f4045c",
"t2t-job": {
jobType: "trainer",
@ -121,7 +121,6 @@
workingDir: $.components["t2t-code-search"].workingDir,
dataDir: self.workingDir + "/data",
functionEmbeddingsDir: self.workingDir + "/code_embeddings",
tokenPairsBQTable: "",
functionEmbeddingsBQTable: "",
},

View File

@ -22,6 +22,7 @@ local jobSpec = {
},
spec: {
replicas: 1,
backoffLimit: 0,
template: {
metadata: {
labels: {

View File

@ -13,6 +13,7 @@
},
spec: {
replicas: 1,
backoffLimit: 0,
template: {
metadata: {
labels: {
@ -32,7 +33,6 @@
"code_search.dataflow.cli.create_function_embeddings",
"--runner=DataflowRunner",
"--project=" + params.project,
"--token_pairs_table=" + params.tokenPairsBQTable,
"--function_embeddings_table=" + params.functionEmbeddingsBQTable,
"--output_dir=" + params.functionEmbeddingsDir,
"--data_dir=" + params.dataDir,

View File

@ -33,7 +33,7 @@ local jobSpec = {
"python2",
"-m",
"code_search.dataflow.cli.preprocess_github_dataset",
"--runner=DataflowRunner",
"--runner=DataflowRunner",
"--project=" + params.project,
"--target_dataset=" + params.targetDataset,
"--data_dir=" + params.dataDir,
@ -50,7 +50,7 @@ local jobSpec = {
value: "/secret/gcp-credentials/user-gcp-sa.json",
},
],
workingDir: "/src",
workingDir: "/src",
volumeMounts: [
{
mountPath: "/secret/gcp-credentials",

View File

@ -0,0 +1,198 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Install Kubeflow Pipelines SDK"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz\n",
"\u001b[?25l Downloading https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz (69kB)\n",
"\u001b[K 100% |████████████████████████████████| 71kB 4.0MB/s eta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: urllib3>=1.15 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.22)\n",
"Requirement already satisfied, skipping upgrade: six>=1.10 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.11.0)\n",
"Requirement already satisfied, skipping upgrade: certifi in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (2018.11.29)\n",
"Requirement already satisfied, skipping upgrade: python-dateutil in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (2.7.5)\n",
"Requirement already satisfied, skipping upgrade: PyYAML in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (3.13)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-storage==1.13.0 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.13.0)\n",
"Collecting kubernetes==8.0.0 (from kfp==0.1)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/6c/44/f8286fb7a25a4ff29a4dec1b5baa49571eedc2b2edf6ec4b51e4b511ac0f/kubernetes-8.0.0-py2.py3-none-any.whl (1.3MB)\n",
"\u001b[K 100% |████████████████████████████████| 1.4MB 14.9MB/s ta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: google-resumable-media>=0.3.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (0.3.1)\n",
"Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=0.1.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (1.6.0)\n",
"Requirement already satisfied, skipping upgrade: google-cloud-core<0.29dev,>=0.28.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (0.28.1)\n",
"Requirement already satisfied, skipping upgrade: requests in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (2.18.4)\n",
"Requirement already satisfied, skipping upgrade: google-auth>=1.0.1 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (1.6.1)\n",
"Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (38.4.0)\n",
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (0.54.0)\n",
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (1.0.0)\n",
"Collecting adal>=1.0.2 (from kubernetes==8.0.0->kfp==0.1)\n",
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/2d/2f/14882b8dae0977e85577abde3065c141fb94dbb242adfb80e21797e4f7c9/adal-1.2.0-py2.py3-none-any.whl (52kB)\n",
"\u001b[K 100% |████████████████████████████████| 61kB 20.2MB/s ta 0:00:01\n",
"\u001b[?25hRequirement already satisfied, skipping upgrade: pytz in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (2018.7)\n",
"Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (3.6.1)\n",
"Requirement already satisfied, skipping upgrade: googleapis-common-protos!=1.5.4,<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (1.5.5)\n",
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests->kubernetes==8.0.0->kfp==0.1) (3.0.4)\n",
"Requirement already satisfied, skipping upgrade: idna<2.7,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests->kubernetes==8.0.0->kfp==0.1) (2.6)\n",
"Requirement already satisfied, skipping upgrade: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (3.0.0)\n",
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (0.2.2)\n",
"Requirement already satisfied, skipping upgrade: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (4.0)\n",
"Requirement already satisfied, skipping upgrade: oauthlib>=0.6.2 in /opt/conda/lib/python3.6/site-packages (from requests-oauthlib->kubernetes==8.0.0->kfp==0.1) (2.1.0)\n",
"Collecting PyJWT>=1.0.0 (from adal>=1.0.2->kubernetes==8.0.0->kfp==0.1)\n",
" Downloading https://files.pythonhosted.org/packages/87/8b/6a9f14b5f781697e51259d81657e6048fd31a113229cf346880bb7545565/PyJWT-1.7.1-py2.py3-none-any.whl\n",
"Requirement already satisfied, skipping upgrade: cryptography>=1.1.0 in /opt/conda/lib/python3.6/site-packages (from adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (2.1.4)\n",
"Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.1 in /opt/conda/lib/python3.6/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (0.4.4)\n",
"Requirement already satisfied, skipping upgrade: asn1crypto>=0.21.0 in /opt/conda/lib/python3.6/site-packages (from cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (0.24.0)\n",
"Requirement already satisfied, skipping upgrade: cffi>=1.7 in /opt/conda/lib/python3.6/site-packages (from cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (1.11.4)\n",
"Requirement already satisfied, skipping upgrade: pycparser in /opt/conda/lib/python3.6/site-packages (from cffi>=1.7->cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (2.18)\n",
"Building wheels for collected packages: kfp\n",
" Running setup.py bdist_wheel for kfp ... \u001b[?25ldone\n",
"\u001b[?25h Stored in directory: /tmp/pip-ephem-wheel-cache-yt23ripq/wheels/00/ab/c6/a055a8d8730d1b5b508b0d9ac42f12b531b1bbf575b31efe73\n",
"Successfully built kfp\n",
"\u001b[31mfairing 0.0.3 has requirement kubernetes==6.0.0, but you'll have kubernetes 8.0.0 which is incompatible.\u001b[0m\n",
"Installing collected packages: PyJWT, adal, kubernetes, kfp\n",
" Found existing installation: kubernetes 6.0.0\n",
" Uninstalling kubernetes-6.0.0:\n",
" Successfully uninstalled kubernetes-6.0.0\n",
" Found existing installation: kfp 0.1\n",
" Uninstalling kfp-0.1:\n",
" Successfully uninstalled kfp-0.1\n",
"Successfully installed PyJWT-1.7.1 adal-1.2.0 kfp-0.1 kubernetes-8.0.0\n"
]
}
],
"source": [
"!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz --upgrade"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import required library"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import kfp\n",
"from kfp import compiler\n",
"from kubernetes import client as k8s_client"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import pipeline definition"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import index_update_pipeline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Create an experiment first"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found experiment with name: jlewi-notebook-test\n"
]
}
],
"source": [
"client = kfp.Client()\n",
"name = 'jlewi-notebook-test'\n",
"\n",
"exp = None\n",
"experiments = client.list_experiments()\n",
"for e in experiments.experiments:\n",
" if e.name == name:\n",
" print(\"Found experiment with name: %s\" % name)\n",
" exp = e\n",
" break\n",
" \n",
"if not exp: \n",
" print(\"Creating new experiment\")\n",
" exp = client.create_experiment(name=name)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Run link <a href=\"/pipeline/#/runs/details/dd14a369-fcb0-11e8-a5c5-42010a8e0036\" target=\"_blank\" >here</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Compile it into a tar package.\n",
"compiler.Compiler().compile(index_update_pipeline.function_embedding_update, 'function_embedding_update.tar.gz')\n",
"\n",
"# Submit a run.\n",
"# inputs - experiment id, run name, tarball file\n",
"run = client.run_pipeline(exp.id, 'code-search-function-embedding', 'function_embedding_update.tar.gz')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -1,127 +1,93 @@
from typing import Dict
# Example Pipeline to update code search UI configuration
# To compile, use Kubeflow Pipelines V0.1.3 SDK or above.
import uuid
from kubernetes import client as k8s_client
import kfp.dsl as dsl
import kfp.gcp as gcp
# disable max arg lint check
# pylint: disable=R0913
def default_gcp_op(name: str, image: str, command: str = None,
arguments: str = None, file_inputs: Dict[dsl.PipelineParam, str] = None,
file_outputs: Dict[str, str] = None, is_exit_handler=False):
"""An operator that mounts the default GCP service account to the container.
The user-gcp-sa secret is created as part of the kubeflow deployment that
stores the access token for kubeflow user service account.
With this service account, the container has a range of GCP APIs to
access to. This service account is automatically created as part of the
kubeflow deployment.
For the list of the GCP APIs this service account can access to, check
https://github.com/kubeflow/kubeflow/blob/7b0db0d92d65c0746ac52b000cbc290dac7c62b1/deployment/gke/deployment_manager_configs/iam_bindings_template.yaml#L18
If you want to call the GCP APIs in a different project, grant the kf-user
service account access permission.
"""
return (
dsl.ContainerOp(
name,
image,
command,
arguments,
file_inputs,
file_outputs,
is_exit_handler,
)
.add_volume(
k8s_client.V1Volume(
name='gcp-credentials',
secret=k8s_client.V1SecretVolumeSource(
secret_name='user-gcp-sa'
)
)
)
.add_volume_mount(
k8s_client.V1VolumeMount(
mount_path='/secret/gcp-credentials',
name='gcp-credentials',
)
)
.add_env_variable(
k8s_client.V1EnvVar(
name='GOOGLE_APPLICATION_CREDENTIALS',
value='/secret/gcp-credentials/user-gcp-sa.json'
)
)
)
def dataflow_function_embedding_op(
project: 'GcpProject', cluster_name: str, token_pairs_bq_table: str,
function_embeddings_bq_table: str, data_dir: 'GcsUri',
function_embeddings_dir: str, saved_model_dir: 'GcsUri', workflow_id: str,
worker_machine_type: str, num_workers: int, working_dir: str, namespace: str):
return default_gcp_op(
cluster_name: str,
function_embeddings_bq_table: str,
function_embeddings_dir: str,
namespace: str,
num_workers: int,
project: 'GcpProject',
saved_model_dir: 'GcsUri',
worker_machine_type: str,
workflow_id: str,
working_dir: str,):
return dsl.ContainerOp(
name='dataflow_function_embedding',
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
command=['/usr/local/src/submit_code_embeddings_job.sh'],
arguments=[
"--workflowId=%s" % workflow_id,
"--modelDir=%s" % saved_model_dir,
"--dataDir=%s" % data_dir,
"--cluster=%s" % cluster_name,
"--dataDir=%s" % 'gs://code-search-demo/20181104/data',
"--functionEmbeddingsDir=%s" % function_embeddings_dir,
"--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table,
"--modelDir=%s" % saved_model_dir,
"--namespace=%s" % namespace,
"--numWorkers=%s" % num_workers,
"--project=%s" % project,
"--tokenPairsBQTable=%s" % token_pairs_bq_table,
"--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table,
"--workerMachineType=%s" % worker_machine_type,
"--workflowId=%s" % workflow_id,
"--workingDir=%s" % working_dir,
"--cluster=%s" % cluster_name,
"--namespace=%s" % namespace,
]
)
).apply(gcp.use_gcp_secret('user-gcp-sa'))
def search_index_creator_op(
index_file: str, lookup_file: str, function_embeddings_dir: str,
workflow_id: str, cluster_name: str, namespace: str):
cluster_name: str,
function_embeddings_dir: str,
index_file: str,
lookup_file: str,
namespace: str,
workflow_id: str):
return dsl.ContainerOp(
# use component name as step name
name='search_index_creator',
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
command=['/usr/local/src/launch_search_index_creator_job.sh'],
arguments=[
'--cluster=%s' % cluster_name,
'--functionEmbeddingsDir=%s' % function_embeddings_dir,
'--indexFile=%s' % index_file,
'--lookupFile=%s' % lookup_file,
'--functionEmbeddingsDir=%s' % function_embeddings_dir,
'--workflowId=%s' % workflow_id,
'--cluster=%s' % cluster_name,
'--namespace=%s' % namespace,
'--workflowId=%s' % workflow_id,
]
)
def update_index_op(
base_git_repo: str, base_branch: str, app_dir: str, fork_git_repo: str,
index_file: str, lookup_file: str, workflow_id: str, bot_email: str):
app_dir: str,
base_branch: str,
base_git_repo: str,
bot_email: str,
fork_git_repo: str,
index_file: str,
lookup_file: str,
workflow_id: str):
return (
dsl.ContainerOp(
name='update_index',
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
command=['/usr/local/src/update_index.sh'],
arguments=[
'--baseGitRepo=%s' % base_git_repo,
'--baseBranch=%s' % base_branch,
'--appDir=%s' % app_dir,
'--baseBranch=%s' % base_branch,
'--baseGitRepo=%s' % base_git_repo,
'--botEmail=%s' % bot_email,
'--forkGitRepo=%s' % fork_git_repo,
'--env=%s' % 'pipeline',
'--indexFile=%s' % index_file,
'--lookupFile=%s' % lookup_file,
'--workflowId=%s' % workflow_id,
'--botEmail=%s' % bot_email,
],
)
.add_volume(
@ -148,50 +114,69 @@ def update_index_op(
# The pipeline definition
@dsl.pipeline(
name='function_embedding',
description='Example function embedding pipeline'
name='github_code_index_update',
description='Example of pipeline to update github code index'
)
def function_embedding_update(
def github_code_index_update(
project='code-search-demo',
cluster_name='cs-demo-1103',
namespace='kubeflow',
working_dir='gs://code-search-demo/pipeline',
data_dir='gs://code-search-demo/20181104/data',
saved_model_dir='gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/',
target_dataset='code_search',
worker_machine_type='n1-highcpu-32',
function_embedding_num_workers=5,
num_workers=5,
base_git_repo='kubeflow/examples',
base_branch='master',
app_dir='code_search/ks-web-app',
fork_git_repo='IronPan/examples',
bot_email='kf.sample.bot@gmail.com'):
bot_email='kf.sample.bot@gmail.com',
# Can't use workflow name as bq_suffix since BQ table doesn't accept '-' and
# workflow name is assigned at runtime. Pipeline might need to support
# replacing characters in workflow name.
# For recurrent pipeline, pass in '[[Index]]' instead, for unique naming.
bq_suffix=uuid.uuid4().hex[:6].upper()):
workflow_name = '{{workflow.name}}'
# Can't use workflow name as bq_suffix since BQ table doesn't accept '-' and
# workflow name is assigned at runtime. Pipeline might need to support
# replacing characters in workflow name.
bq_suffix = uuid.uuid4().hex[:6].upper()
working_dir = '%s/%s' % (working_dir, workflow_name)
lookup_file = '%s/code-embeddings-index/embedding-to-info.csv' % working_dir
index_file = '%s/code-embeddings-index/embeddings.index'% working_dir
function_embeddings_dir = '%s/%s' % (working_dir, "/code_embeddings")
token_pairs_bq_table = '%s:%s.token_pairs' %(project, target_dataset)
function_embeddings_dir = '%s/%s' % (working_dir, "code_embeddings")
function_embeddings_bq_table = \
'%s:%s.function_embeddings_%s' % (project, target_dataset, bq_suffix)
function_embedding = dataflow_function_embedding_op(
project, cluster_name, token_pairs_bq_table, function_embeddings_bq_table,
data_dir, function_embeddings_dir, saved_model_dir, workflow_name,
worker_machine_type, function_embedding_num_workers, working_dir, namespace)
cluster_name,
function_embeddings_bq_table,
function_embeddings_dir,
namespace,
num_workers,
project,
saved_model_dir,
worker_machine_type,
workflow_name,
working_dir)
search_index_creator = search_index_creator_op(
index_file, lookup_file, function_embeddings_dir, workflow_name, cluster_name, namespace)
cluster_name,
function_embeddings_dir,
index_file,
lookup_file,
namespace,
workflow_name)
search_index_creator.after(function_embedding)
update_index_op(
base_git_repo, base_branch, app_dir, fork_git_repo,
index_file, lookup_file, workflow_name, bot_email).after(search_index_creator)
app_dir,
base_branch,
base_git_repo,
bot_email,
fork_git_repo,
index_file,
lookup_file,
workflow_name).after(search_index_creator)
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(function_embedding_update, __file__ + '.tar.gz')
compiler.Compiler().compile(github_code_index_update, __file__ + '.tar.gz')

View File

@ -26,7 +26,7 @@ class PipelineCLIOptions(pipeline_options.StandardOptions,
def add_parser_arguments(parser):
additional_args_parser = parser.add_argument_group('Custom Arguments')
additional_args_parser.add_argument('--target_dataset', metavar='', type=str,
help='BigQuery dataset for output results')
help='BigQuery dataset for output results')
additional_args_parser.add_argument('--pre_transformed', action='store_true',
help='Use a pre-transformed BigQuery dataset')
additional_args_parser.add_argument('--wait_until_finished', action='store_true',
@ -42,6 +42,10 @@ def add_parser_arguments(parser):
help=('If specified read the entire GitHub dataset '
'specified as PROJECT:DATASET.TABLE. If not '
'specified we run a query to filter the data.'))
additional_args_parser.add_argument('--failed_tokenize_table', metavar='', type=str,
help='The BigQuery table containing the '
'failed tokenize entry. This should be '
'of the form PROJECT:DATASET.TABLE.')
predict_args_parser = parser.add_argument_group('Batch Prediction Arguments')
predict_args_parser.add_argument('--token_pairs_table', metavar='', type=str,

View File

@ -6,6 +6,7 @@ import apache_beam as beam
import code_search.dataflow.cli.arguments as arguments
from code_search.dataflow.transforms import bigquery
import code_search.dataflow.transforms.github_bigquery as gh_bq
import code_search.dataflow.transforms.github_dataset as github_dataset
import code_search.dataflow.transforms.function_embeddings as func_embed
import code_search.dataflow.do_fns.dict_to_csv as dict_to_csv
@ -30,12 +31,21 @@ def create_function_embeddings(argv=None):
pipeline = beam.Pipeline(options=pipeline_opts)
token_pairs_query = gh_bq.ReadTransformedGithubDatasetQuery(
args.token_pairs_table)
token_pairs_source = beam.io.BigQuerySource(
query=token_pairs_query.query_string, use_standard_sql=True)
embeddings = (pipeline
| "Read Transformed Github Dataset" >> beam.io.Read(token_pairs_source)
if args.token_pairs_table:
token_pairs_query = gh_bq.ReadTransformedGithubDatasetQuery(
args.token_pairs_table)
token_pairs_source = beam.io.BigQuerySource(
query=token_pairs_query.query_string, use_standard_sql=True)
token_pairs = (pipeline
| "Read Transformed Github Dataset" >> beam.io.Read(token_pairs_source)
)
else:
token_pairs = (pipeline
| "Read Github Dataset" >> gh_bq.ReadGithubDataset(args.project)
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(None, None)
)
embeddings = (token_pairs
| "Compute Function Embeddings" >> func_embed.FunctionEmbeddings(args.problem,
args.data_dir,
args.saved_model_dir)
@ -59,7 +69,7 @@ def create_function_embeddings(argv=None):
)
(embeddings # pylint: disable=expression-not-assigned
| "Format for CSV Write" >> beam.ParDo(dict_to_csv.DictToCSVString(
| "Format for Embeddings CSV Write" >> beam.ParDo(dict_to_csv.DictToCSVString(
['nwo', 'path', 'function_name', 'lineno', 'original_function', 'function_embedding']))
| "Write Embeddings to CSV" >> beam.io.WriteToText('{}/func-index'.format(args.output_dir),
file_name_suffix='.csv',

View File

@ -24,7 +24,6 @@ def preprocess_github_dataset(argv=None):
- If Github Python files have already been processed, use the
pre-processed table instead (using flag `--pre-transformed`)
- Tokenize files into pairs of function definitions and docstrings
- All results are stored in a BigQuery dataset (`args.target_dataset`)
- See `transforms.github_dataset.TransformGithubDataset` for details of tables created
- Additionally, store pairs of docstring and function tokens in a CSV file
for training
@ -59,8 +58,8 @@ def preprocess_github_dataset(argv=None):
input_records = (pipeline
| "Read Github Dataset" >> gh_bq.ReadGithubDataset(args.project))
token_pairs = (input_records
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(args.project,
args.target_dataset)
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(
args.token_pairs_table, args.failed_tokenize_table)
)
(token_pairs # pylint: disable=expression-not-assigned
@ -73,7 +72,7 @@ def preprocess_github_dataset(argv=None):
result = pipeline.run()
logging.info("Submitted Dataflow job: %s", result)
if args.wait_until_finish:
if args.wait_until_finished:
result.wait_until_finish()
return result

View File

@ -77,12 +77,10 @@ class BigQueryWrite(beam.PTransform):
]
"""
def __init__(self, project, dataset, table, batch_size=500,
def __init__(self, table, batch_size=500,
write_disposition=bigquery.BigQueryDisposition.WRITE_TRUNCATE):
super(BigQueryWrite, self).__init__()
self.project = project
self.dataset = dataset
self.table = table
self.write_disposition = write_disposition
self.batch_size = batch_size
@ -97,9 +95,7 @@ class BigQueryWrite(beam.PTransform):
def expand(self, input_or_inputs):
return (input_or_inputs
| beam.io.WriteToBigQuery(project=self.project,
dataset=self.dataset,
table=self.table,
| beam.io.WriteToBigQuery(table=self.table,
schema=self.output_schema,
batch_size=self.batch_size,
write_disposition=self.write_disposition)

View File

@ -74,30 +74,6 @@ class ReadGithubDataset(bq_transform.BigQueryRead):
return query
class WriteFailedTokenizedData(bq_transform.BigQueryWrite):
@property
def column_list(self):
return [
('nwo', 'STRING'),
('path', 'STRING'),
('content', 'STRING')
]
class WriteTokenizedData(bq_transform.BigQueryWrite):
@property
def column_list(self):
return [
('nwo', 'STRING'),
('path', 'STRING'),
('function_name', 'STRING'),
('lineno', 'STRING'),
('original_function', 'STRING'),
('function_tokens', 'STRING'),
('docstring_tokens', 'STRING'),
]
class ReadTransformedGithubDatasetQuery(object):
def __init__(self, table, limit=None):

View File

@ -2,8 +2,8 @@ import logging
import apache_beam as beam
from code_search.dataflow.transforms import bigquery
import code_search.dataflow.do_fns.github_dataset as gh_do_fns
import code_search.dataflow.transforms.github_bigquery as gh_bq
class TransformGithubDataset(beam.PTransform):
@ -15,19 +15,15 @@ class TransformGithubDataset(beam.PTransform):
All tiny docstrings (smaller than `self.min_docstring_tokens`)
are filtered out.
This transform creates following tables in the `target_dataset`
This transform creates following tables
which are defined as properties for easy modification.
- `self.failed_tokenize_table`
- `self.pairs_table`
"""
def __init__(self, project, target_dataset,
pairs_table=gh_bq.PAIRS_TABLE,
failed_tokenize_table=gh_bq.FAILED_TOKENIZE_TABLE):
def __init__(self, pairs_table, failed_tokenize_table):
super(TransformGithubDataset, self).__init__()
self.project = project
self.target_dataset = target_dataset
self.pairs_table = pairs_table
self.failed_tokenize_table = failed_tokenize_table
@ -44,10 +40,19 @@ class TransformGithubDataset(beam.PTransform):
pairs, tokenize_errors = tokenize_result.rows, tokenize_result.err
if self.target_dataset:
if self.failed_tokenize_table:
failed_tokenize_table_schema = bigquery.BigQuerySchema([
('nwo', 'STRING'),
('path', 'STRING'),
('content', 'STRING')
])
(tokenize_errors # pylint: disable=expression-not-assigned
| "Failed Tokenization" >> gh_bq.WriteFailedTokenizedData(self.project, self.target_dataset,
self.failed_tokenize_table)
| "Failed Tokenization" >> beam.io.WriteToBigQuery(table=self.failed_tokenize_table,
schema=failed_tokenize_table_schema,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)
)
else:
logging.info("No bigquery dataset provided; tokenization errors will "
@ -59,13 +64,23 @@ class TransformGithubDataset(beam.PTransform):
lambda row: len(row['docstring_tokens'].split(' ')) > self.min_docstring_tokens)
)
if self.target_dataset:
logging.info("Writing results to BigQuery %s:%s.%s",
self.project, self.target_dataset, self.pairs_table)
if self.pairs_table:
logging.info("Writing results to BigQuery %s", self.pairs_table)
tokenize_table_schema = bigquery.BigQuerySchema([
('nwo', 'STRING'),
('path', 'STRING'),
('function_name', 'STRING'),
('lineno', 'STRING'),
('original_function', 'STRING'),
('function_tokens', 'STRING'),
('docstring_tokens', 'STRING'),
])
(flat_rows # pylint: disable=expression-not-assigned
| "Save Tokens" >> gh_bq.WriteTokenizedData(self.project, self.target_dataset,
self.pairs_table)
| "Save Tokens" >> beam.io.WriteToBigQuery(table=self.pairs_table,
schema=tokenize_table_schema,
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)
)
else:
logging.info("target_dataset not set will not write to BigQuery")
logging.info("pairs_table not set will not write to BigQuery")
return flat_rows

View File

@ -0,0 +1,20 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.serving;
local k = import "k.libsonnet";
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
local name = params.name;
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
local tfServing = tfServingBase {
// Override parameters with user supplied parameters.
params+: updatedParams {
name: name,
},
};
std.prune(k.core.v1.list.new(tfServing.components))

View File

@ -0,0 +1,198 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2tcpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
sync: "0",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/CPU/training/yelp-model",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
trainSteps: 1000,
evalSteps: 10,
psGpu: 0,
workerGpu: 0,
workers: 3,
masters: 1,
ps: 1,
jobName: "t2tcpu",
} + params;
local baseCommand = [
"bash",
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
"--train_steps=" + updatedParams.trainSteps,
"--hparams_set=" + updatedParams.hparams,
"--model=" + updatedParams.model,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
];
local psCommand = baseCommand + [
"--schedule=run_std_server",
];
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
local workerBaseCommand = baseCommand + [
"--schedule=train",
"--sync=" + updatedParams.sync,
"--ps_gpu=" + updatedParams.psGpu,
"--worker_gpu=" + updatedParams.workerGpu,
"--worker_replicas=" + totalWorkerReplicas,
"--ps_replicas=" + updatedParams.ps,
"--eval_steps=" + updatedParams.evalSteps,
];
local workerCommand = workerBaseCommand + [
"--worker_job=/job:worker",
];
local masterCommand = workerBaseCommand + [
"--worker_job=/job:master",
];
local gpuResources = {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
};
local cloud = std.toString(updatedParams.cloud);
local baseEnv = [
{
name: "PYTHONPATH",
value: "/home/jovyan",
},
];
local nonGkeEnv = baseEnv + [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
];
local nonGkeVolumes = [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
];
local nonGkeImagePullSecrets = [
{
name: "gcp-registry-credentials",
},
];
local nonGkeVolumeMounts = [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
];
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
spec: {
containers: [
{
command: masterCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Master
Worker: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
command: workerCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Worker
Ps: {
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
command: psCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: updatedParams.cpuImage,
name: "tensorflow",
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Ps
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -0,0 +1,197 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2tgpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
sync: "0",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/GPU/training/yelp-model",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
trainSteps: 1000,
evalSteps: 10,
psGpu: 0,
workerGpu: 1,
workers: 3,
masters: 1,
ps: 1,
jobName: "t2tgpu",
} + params;
local baseCommand = [
"bash",
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
"--train_steps=" + updatedParams.trainSteps,
"--hparams_set=" + updatedParams.hparams,
"--model=" + updatedParams.model,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
];
local psCommand = baseCommand + [
"--schedule=run_std_server",
];
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
local workerBaseCommand = baseCommand + [
"--schedule=train",
"--sync=" + updatedParams.sync,
"--ps_gpu=" + updatedParams.psGpu,
"--worker_gpu=" + updatedParams.workerGpu,
"--worker_replicas=" + totalWorkerReplicas,
"--ps_replicas=" + updatedParams.ps,
"--eval_steps=" + updatedParams.evalSteps,
];
local workerCommand = workerBaseCommand + [
"--worker_job=/job:worker",
];
local masterCommand = workerBaseCommand + [
"--worker_job=/job:master",
];
local gpuResources = {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
};
local cloud = std.toString(updatedParams.cloud);
local baseEnv = [
{
name: "PYTHONPATH",
value: "/home/jovyan",
},
];
local nonGkeEnv = baseEnv + [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
];
local nonGkeVolumes = [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
];
local nonGkeImagePullSecrets = [
{
name: "gcp-registry-credentials",
},
];
local nonGkeVolumeMounts = [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
];
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
spec: {
containers: [
{
command: masterCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Master
Worker: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
command: workerCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Worker
Ps: {
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
command: psCommand,
env: if cloud != "gke" then nonGkeEnv else baseEnv,
image: updatedParams.cpuImage,
name: "tensorflow",
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
},
],
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
restartPolicy: "OnFailure",
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
},
},
}, // Ps
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -0,0 +1,95 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["t2ttpu"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local updatedParams = {
cloud: "gke",
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
usrDir: "./yelp_sentiment",
problem: "yelp_sentiment",
model: "transformer_encoder",
hparams: "transformer_yelp_sentiment",
hparamsSet: "transformer_yelp_sentiment",
outputGCSPath: "gs://kubeflow-demo-base/training/yelp-model-TPU",
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
trainSteps: 1000,
evalSteps: 10,
tpus: 8,
jobName: "t2ttpu",
tpuEndpoint: "$(KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS)",
} + params;
local cloud = std.toString(updatedParams.cloud);
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: updatedParams.jobName,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: 1,
template: {
metadata: {
annotations: {
"tf-version.cloud-tpus.google.com": "1.9",
},
},
spec: {
containers: [
{
args: [
"--model=" + updatedParams.model,
"--hparams_set=" + updatedParams.hparamsSet,
"--problem=" + updatedParams.problem,
"--t2t_usr_dir=" + updatedParams.usrDir,
"--train_steps=" + updatedParams.trainSteps,
"--eval_steps=" + updatedParams.evalSteps,
"--data_dir=" + updatedParams.dataDir,
"--output_dir=" + updatedParams.outputGCSPath,
"--use_tpu",
"--master=" + updatedParams.tpuEndpoint,
],
command: [
"t2t-trainer",
],
image: updatedParams.cpuImage,
name: "tensorflow",
resources: {
"limits": {
"cloud-tpus.google.com/v2": updatedParams.tpus,
},
requests: {
memory: "1Gi",
},
},
},
],
restartPolicy: "OnFailure",
}, // spec
}, // template
}, // Master
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -4,4 +4,4 @@ local k = import "k.libsonnet";
local ui = import "ui.libsonnet";
std.prune(k.core.v1.list.new(ui.all(params, env)))
std.prune(k.core.v1.list.new(ui.parts(params, env)))

View File

@ -0,0 +1,102 @@
{
parts(params, env):: [
{
apiVersion: "v1",
kind: "Service",
metadata: {
name: "kubeflow-demo-ui",
namespace: env.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: kubeflow_demo_ui",
"prefix: /kubeflow_demo/",
"rewrite: /",
"service: kubeflow-demo-ui:80",
]),
},
},
spec: {
ports: [
{
port: 80,
targetPort: 80,
},
],
selector: {
app: "kubeflow-demo-ui",
},
type: "ClusterIP",
},
},
{
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "kubeflow-demo-ui",
namespace: env.namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "kubeflow-demo-ui",
},
},
spec: {
containers: [
{
args: [
"app.py",
"--model_url",
"http://serving:8000/model/serving:predict",
"--data_dir",
"gs://kubeflow-demo-base/featurization/yelp-data-1000000",
],
command: [
"python",
],
image: params.image,
name: "kubeflow-demo-ui",
ports: [
{
containerPort: 80,
},
],
"env": [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/secret/gcp-credentials/key.json"
},
],
"volumeMounts": [
{
mountPath: "/secret/gcp-credentials",
name: "gcp-credentials",
},
],
},
],
"imagePullSecrets": [
{
name: "gcp-registry-credentials",
},
],
"volumes": [
{
name: "gcp-credentials",
secret: {
secretName: "gcp-credentials",
},
},
],
},
},
},
},
],
}

View File

@ -115,7 +115,7 @@ ks generate tf-job-simple train
```
This Ksonnet protoytype needs to be slightly modified to our needs, you can simply copy an updated version of this prototype by copying the updated version from the repository.
```
cp ../tensorflow-model/CPU/train.jsonnet ./components/train.jsonnet
cp ../tensorflow_model/CPU/train.jsonnet ./components/train.jsonnet
```
Now we need to define the parameters which are currently set as placeholders in the training job prototype.
@ -252,7 +252,7 @@ We will create a separate pool and install the necessary NVIDIA GPU device drive
For more instruction on how to handle GPUs on Kubernetes, see https://cloud.google.com/kubernetes-engine/docs/how-to/gpus.
```
cloud container node-pools create gpu-pool --accelerator type=nvidia-tesla-k80,count=1 --zone europe-west1-b --cluster kubeflow --num-nodes 1 --min-nodes 1 --max-nodes 1 --enable-autoscaling --scopes=https://www.googleapis.com/auth/cloud-platform
gcloud container node-pools create gpu-pool --accelerator type=nvidia-tesla-k80,count=1 --zone europe-west1-b --cluster kubeflow --num-nodes 1 --min-nodes 1 --max-nodes 1 --enable-autoscaling --scopes=https://www.googleapis.com/auth/cloud-platform
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/stable/nvidia-driver-installer/cos/daemonset-preloaded.yaml
```

View File

@ -44,22 +44,6 @@ local tfjob = {
},
},
},
Ps: {
template: {
spec: {
containers: [
{
args: args,
image: params.cpuImage,
name: "tensorflow",
workingDir: "/opt/workdir",
},
],
restartPolicy: "OnFailure",
},
},
tfReplicaType: "PS",
},
},
},
};

1
github_issue_summarization/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build/**

View File

@ -1,74 +1,114 @@
# Setup Kubeflow
In this part, you will setup kubeflow on an existing kubernetes cluster.
In this section, you will setup Kubeflow on an existing Kubernetes cluster.
## Requirements
* A kubernetes cluster
* To create a managed cluster run
```commandline
gcloud container clusters create kubeflow-examples-cluster
```
or use kubeadm: [docs](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/)
* `kubectl` CLI (command line interface) pointing to the kubernetes cluster
* A Kubernetes cluster
* To create a cluster, follow the instructions on the
[Set up Kubernetes](https://www.kubeflow.org/docs/started/getting-started/#set-up-kubernetes)
section of the Kubeflow Getting Started guide. We recommend using a
managed service such as Google Kubernetes Engine (GKE).
[This link](https://www.kubeflow.org/docs/started/getting-started-gke/)
guides you through the process of using either
[Click-to-Deploy](https://deploy.kubeflow.cloud/#/deploy) (a web-based UI) or
[`kfctl`](https://github.com/kubeflow/kubeflow/blob/master/scripts/kfctl.sh)
(a CLI tool) to generate a GKE cluster with all Kubeflow components
installed. Note that there is no need to complete the Deploy Kubeflow steps
below if you use either of these two tools.
* The Kubernetes CLI `kubectl` pointing to the kubernetes cluster
* Make sure that you can run `kubectl get nodes` from your terminal
successfully
* The ksonnet CLI, v0.9.2 or higher: [ks](https://ksonnet.io/#get-started)
* The ksonnet CLI [`ks`](https://ksonnet.io/#get-started), v0.9.2 or higher:
* In case you want to install a particular version of ksonnet, you can run
```commandline
export KS_VER=ks_0.11.0_linux_amd64
wget -O /tmp/$KS_VER.tar.gz https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/$KS_VER.tar.gz
```bash
export KS_VER=0.13.1
export KS_BIN=ks_${KS_VER}_linux_amd64
wget -O /tmp/${KS_BIN}.tar.gz https://github.com/ksonnet/ksonnet/releases/download/v${KS_VER}/${KS_BIN}.tar.gz
mkdir -p ${HOME}/bin
tar -xvf /tmp/$KS_VER.tar.gz -C ${HOME}/bin
export PATH=$PATH:${HOME}/bin/$KS_VER
tar -xvf /tmp/${KS_BIN}.tar.gz -C ${HOME}/bin
export PATH=$PATH:${HOME}/bin/${KS_BIN}
```
## Kubeflow setup
Refer to the [user
guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) for
detailed instructions on how to setup kubeflow on your kubernetes cluster.
Refer to the [guide](https://www.kubeflow.org/docs/started/getting-started/) for
detailed instructions on how to setup Kubeflow on your Kubernetes cluster.
Specifically, complete the following sections:
* [Deploy
Kubeflow](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#deploy-kubeflow)
* The `ks-kubeflow` directory can be used instead of creating a ksonnet
app from scratch.
* If you run into
[API rate limiting errors](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md#github-rate-limiting-errors),
ensure you have a `${GITHUB_TOKEN}` environment variable set.
* If you run into
[RBAC permissions issues](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#rbac-clusters)
running `ks apply` commands, be sure you have created a `cluster-admin` ClusterRoleBinding for your username.
* [Setup a persistent disk](https://www.kubeflow.org/docs/other-guides/advanced/)
* We need a shared persistent disk to store our training data since
containers' filesystems are ephemeral and don't have a lot of storage space.
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
name `github-issues-data`.
* After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
picks up the NFS mount. You can delete it by running `kubectl delete pod
tf-hub-0 -n=${NAMESPACE}`
* [Bringing up a
Notebook](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#bringing-up-a-jupyter-notebook)
* When choosing an image for your cluster in the JupyterHub UI, use the
image from this example:
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
* [Deploy Kubeflow](https://www.kubeflow.org/docs/other-guides/advanced/)
* The latest version that was tested with this walkthrough was v0.4.0-rc.2.
* The [`kfctl`](https://github.com/kubeflow/kubeflow/blob/master/scripts/kfctl.sh)
CLI tool can be used to install Kubeflow on an existing cluster. Follow
[this guide](https://www.kubeflow.org/docs/started/getting-started/#kubeflow-quick-start)
to use `kfctl` to generate a ksonnet app, create Kubeflow manifests, and
install all default components onto an existing Kubernetes cluster. Note
that you can likely skip this step if you used
[Click-to-Deploy](https://deploy.kubeflow.cloud/#/deploy)
or `kfctl` to generate your cluster.
* [Setup a persistent disk](https://www.kubeflow.org/docs/guides/advanced/)
* We need a shared persistent disk to store our training data since
containers' filesystems are ephemeral and don't have a lot of storage space.
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
name `github-issues-data`.
* After the NFS is ready, delete the `jupyter-0` pod so that it gets recreated and
picks up the NFS mount. You can delete it by running `kubectl delete pod
jupyter-0 -n=${NAMESPACE}`
* [Bringing up a
Notebook](https://www.kubeflow.org/docs/guides/components/jupyter/)
* When choosing an image for your cluster in the JupyterHub UI, use the
image from this example:
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
After completing that, you should have the following ready:
* A ksonnet app in a directory named `ks-kubeflow`
* An output similar to this for `kubectl get pods` command
* A ksonnet app in a directory named `ks_app`
* An output similar to this for `kubectl -n kubeflow get pods` command
```commandline
NAME READY STATUS RESTARTS AGE
ambassador-75bb54594-dnxsd 2/2 Running 0 3m
ambassador-75bb54594-hjj6m 2/2 Running 0 3m
ambassador-75bb54594-z948h 2/2 Running 0 3m
jupyter-chasm 1/1 Running 0 49s
spartakus-volunteer-565b99cd69-knjf2 1/1 Running 0 3m
tf-hub-0 1/1 Running 0 3m
tf-job-dashboard-6c757d8684-d299l 1/1 Running 0 3m
tf-job-operator-77776c8446-lpprm 1/1 Running 0 3m
```bash
NAME READY STATUS RESTARTS AGE
ambassador-5cf8cd97d5-6qlpz 1/1 Running 0 3m
ambassador-5cf8cd97d5-rqzkx 1/1 Running 0 3m
ambassador-5cf8cd97d5-wz9hl 1/1 Running 0 3m
argo-ui-7c9c69d464-xpphz 1/1 Running 0 3m
centraldashboard-6f47d694bd-7jfmw 1/1 Running 0 3m
cert-manager-5cb7b9fb67-qjd9p 1/1 Running 0 3m
cm-acme-http-solver-2jr47 1/1 Running 0 3m
ingress-bootstrap-x6whr 1/1 Running 0 3m
jupyter-0 1/1 Running 0 3m
jupyter-chasm 1/1 Running 0 49s
katib-ui-54b4667bc6-cg4jk 1/1 Running 0 3m
metacontroller-0 1/1 Running 0 3m
minio-7bfcc6c7b9-qrshc 1/1 Running 0 3m
ml-pipeline-b59b58dd6-bwm8t 1/1 Running 0 3m
ml-pipeline-persistenceagent-9ff99498c-v4k8f 1/1 Running 0 3m
ml-pipeline-scheduledworkflow-78794fd86f-4tzxp 1/1 Running 0 3m
ml-pipeline-ui-9884fd997-7jkdk 1/1 Running 0 3m
ml-pipelines-load-samples-668gj 0/1 Completed 0 3m
mysql-6f6b5f7b64-qgbkz 1/1 Running 0 3m
pytorch-operator-6f87db67b7-nld5h 1/1 Running 0 3m
spartakus-volunteer-7c77dc796-7jgtd 1/1 Running 0 3m
studyjob-controller-68c6fc5bc8-jkc9q 1/1 Running 0 3m
tf-job-dashboard-5f986cf99d-kb6gp 1/1 Running 0 3m
tf-job-operator-v1beta1-5876c48976-q96nh 1/1 Running 0 3m
vizier-core-78f57695d6-5t8z7 1/1 Running 0 3m
vizier-core-rest-7d7dd7dbb8-dbr7n 1/1 Running 0 3m
vizier-db-777675b958-c46qh 1/1 Running 0 3m
vizier-suggestion-bayesianoptimization-7f46d8cb47-wlltt 1/1 Running 0 3m
vizier-suggestion-grid-64c5f8bdf-2bznv 1/1 Running 0 3m
vizier-suggestion-hyperband-8546bf5885-54hr6 1/1 Running 0 3m
vizier-suggestion-random-c4c8d8667-l96vs 1/1 Running 0 3m
whoami-app-7b575b555d-85nb8 1/1 Running 0 3m
workflow-controller-5c95f95f58-hprd5 1/1 Running 0 3m
```
* A Jupyter Notebook accessible at http://127.0.0.1:8000
@ -77,10 +117,14 @@ tf-job-operator-77776c8446-lpprm 1/1 Running 0
## Summary
* We created a ksonnet app for our kubeflow deployment
* We deployed the kubeflow-core component to our kubernetes cluster
* We created a disk for storing our training data
* We connected to JupyterHub and spawned a new Jupyter notebook
* For additional details and playground visit [katacoda](https://www.katacoda.com/kubeflow/scenarios/deploying-github-issue-summarization)
* We created a ksonnet app for our kubeflow deployment: `ks_app`.
* We deployed the default Kubeflow components to our Kubernetes cluster.
* We created a disk for storing our training data.
* We connected to JupyterHub and spawned a new Jupyter notebook.
* For additional details and self-paced learning scenarios related to this
example, see the
[Resources](https://www.kubeflow.org/docs/started/getting-started/#resources)
section of the
[Getting Started Guide](https://www.kubeflow.org/docs/started/getting-started/).
*Next*: [Training the model](02_training_the_model.md)
*Next*: [Training the model with a notebook](02_training_the_model.md)

View File

@ -1,23 +1,26 @@
# Distributed training using Estimator
Distributed training with keras currently doesn't work; see
Distributed training with Keras currently does not work. Do not follow this guide
until these issues have been resolved:
* kubeflow/examples#280
* kubeflow/examples#96
* [kubeflow/examples#280](https://github.com/kubeflow/examples/issues/280)
* [kubeflow/examples#196](https://github.com/kubeflow/examples/issues/196)
Requires Tensorflow 1.9 or later.
Requires TensorFlow 1.9 or later.
Requires [StorageClass](https://kubernetes.io/docs/concepts/storage/storage-classes/) capable of creating ReadWriteMany persistent volumes.
On GKE you can follow [GCFS documentation](https://master.kubeflow.org/docs/started/getting-started-gke/#using-gcfs-with-kubeflow) to enable it.
Estimator and Keras are both part of Tensorflow. These high level APIs are designed
to make building models easier. In our distributed training example we will show how both
Estimator and Keras are both part of TensorFlow. These high-level APIs are designed
to make building models easier. In our distributed training example, we will show how both
APIs work together to help build models that will be trainable in both single node and
distributed manner.
## Keras and Estimators
Code required to run this example can be found in [distributed](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/distributed) directory.
Code required to run this example can be found in the
[distributed](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/distributed)
directory.
You can read more about Estimators [here](https://www.tensorflow.org/guide/estimators).
In our example we will leverage `model_to_estimator` function that allows to turn existing tf.keras model to estimator, and therefore allow it to
@ -93,3 +96,7 @@ tool for us. Please refer to [documentation](https://www.tensorflow.org/guide/pr
## Model
After training is complete, our model can be found in "model" PVC.
*Next*: [Serving the model](03_serving_the_model.md)
*Back*: [Setup a kubeflow cluster](01_setup_a_kubeflow_cluster.md)

View File

@ -1,14 +1,14 @@
# Training the model
# Training the model with a notebook
By this point, you should have a Jupyter Notebook running at http://127.0.0.1:8000.
By this point, you should have a Jupyter notebook running at http://127.0.0.1:8000.
## Download training files
Open the Jupyter Notebook interface and create a new Terminal by clicking on
menu, *New -> Terminal*. In the Terminal, clone this git repo by executing: `
Open the Jupyter notebook interface and create a new Terminal by clicking on
menu, *New -> Terminal*. In the Terminal, clone this git repo by executing:
```commandline
git clone https://github.com/kubeflow/examples.git`
```bash
git clone https://github.com/kubeflow/examples.git
```
Now you should have all the code required to complete training in the `examples/github_issue_summarization/notebooks` folder. Navigate to this folder.
@ -19,7 +19,7 @@ Here you should see two files:
## Perform training
Open th `Training.ipynb` notebook. This contains a complete walk-through of
Open the `Training.ipynb` notebook. This contains a complete walk-through of
downloading the training data, preprocessing it, and training it.
Run the `Training.ipynb` notebook, viewing the output at each step to confirm
@ -44,9 +44,9 @@ kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issu
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl .
```
For information on:
_(Optional)_ You can also perform training with two alternate methods:
- [Training the model using TFJob](02_training_the_model_tfjob.md)
- [Distributed training using tensor2tensor](02_tensor2tensor_training.md)
- [Distributed training using Estimator](02_distributed_training.md)
*Next*: [Serving the model](03_serving_the_model.md)

View File

@ -1,32 +1,35 @@
# Training the model using TFJob
Kubeflow offers a TensorFlow job controller for kubernetes. This allows you to run your distributed Tensorflow training
job on a kubernetes cluster. For this training job, we will read our training data from GCS and write our output model
Kubeflow offers a TensorFlow job controller for Kubernetes. This allows you to run your distributed Tensorflow training
job on a Kubernetes cluster. For this training job, we will read our training
data from Google Cloud Storage (GCS) and write our output model
back to GCS.
## Create the image for training
The [notebooks](notebooks) directory contains the necessary files to create a image for training. The [train.py](notebooks/train.py) file contains the training code. Here is how you can create an image and push it to gcr.
The [notebooks](notebooks) directory contains the necessary files to create an
image for training. The [train.py](notebooks/train.py) file contains the
training code. Here is how you can create an image and push it to Google
Container Registry (GCR):
```commandline
```bash
cd notebooks/
make PROJECT=${PROJECT} set-image
```
## Train Using PVC
If you don't have access to GCS or don't want to use GCS you
can use a persistent volume to store the data and model.
If you don't have access to GCS or do not wish to use GCS, you
can use a Persistent Volume Claim (PVC) to store the data and model.
Create a pvc
Note: your cluster must have a default storage class defined for this to work.
Create a PVC:
```
ks apply --env=${KF_ENV} -c data-pvc
```
* Your cluster must have a default storage class defined for
this to work.
Run the job to download the data to the PVC.
Run the job to download the data to the PVC:
```
ks apply --env=${KF_ENV} -c data-downloader
@ -38,24 +41,24 @@ Submit the training job
ks apply --env=${KF_ENV} -c tfjob-pvc
```
The resulting model will be stored on PVC so to access it you will
need to run a pod and attach the PVC. For serving you can just
attach it the pod serving the model.
The resulting model will be stored on the PVC, so to access it you will
need to run a pod and attach the PVC. For serving, you can just
attach it to the pod serving the model.
## Training Using GCS
If you are running on GCS you can train using GCS to store the input
If you are using GCS, you can train using GCS to store the input
and the resulting model.
### GCS Service account
### GCS service account
* Create a service account which will be used to read and write data from the GCS Bucket.
* Create a service account that will be used to read and write data from the GCS bucket.
* Give the storage account `roles/storage.admin` role so that it can access GCS Buckets.
* Give the storage account `roles/storage.admin` role so that it can access GCS buckets.
* Download its key as a json file and create a secret named `user-gcp-sa` with the key `user-gcp-sa.json`
```commandline
```bash
SERVICE_ACCOUNT=github-issue-summarization
PROJECT=kubeflow-example-project # The GCP Project name
gcloud iam service-accounts --project=${PROJECT} create ${SERVICE_ACCOUNT} \
@ -74,12 +77,12 @@ kubectl --namespace=${NAMESPACE} create secret generic user-gcp-sa --from-file=u
### Run the TFJob using your image
[ks-kubeflow](ks-kubeflow) contains a ksonnet app to deploy the TFJob.
[ks_app](ks_app) contains a ksonnet app to deploy the TFJob.
Set the appropriate params for the tfjob component
Set the appropriate params for the tfjob component:
```commandline
cd ks-kubeflow
```bash
cd ks_app
ks param set tfjob namespace ${NAMESPACE} --env=${KF_ENV}
# The image pushed in the previous step
@ -97,30 +100,31 @@ ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output
Deploy the app:
```commandline
```bash
ks apply ${KF_ENV} -c tfjob
```
In a while you should see a new pod with the label `tf_job_name=tf-job-issue-summarization`
```commandline
kubectl get pods -n=${NAMESPACE} -ltf_job_name=tf-job-issue-summarization
```bash
kubectl get pods -n=${NAMESPACE} tfjob-issue-summarization-master-0
```
You can view the training logs using
```bash
kubectl logs -f -n=${NAMESPACE} tfjob-issue-summarization-master-0
```
You can view the logs of the tf-job operator using
```commandline
kubectl logs -f $(kubectl get pods -n=${NAMESPACE} -lname=tf-job-operator -o=jsonpath='{.items[0].metadata.name}')
```bash
kubectl logs -f -n=${NAMESPACE} $(kubectl get pods -n=${NAMESPACE} -lname=tf-job-operator -o=jsonpath='{.items[0].metadata.name}')
```
You can view the actual training logs using
```commandline
kubectl logs -f $(kubectl get pods -n=${NAMESPACE} -ltf_job_name=tf-job-issue-summarization -o=jsonpath='{.items[0].metadata.name}')
```
For information on:
- [Training the model](02_training_the_model.md)
- [Distributed training using tensor2tensor](02_tensor2tensor_training.md)
_(Optional)_ You can also perform training with two alternate methods:
- [Training the model with a notebook](02_training_the_model.md)
- [Distributed training using Estimator](02_distributed_training.md)
*Next*: [Serving the model](03_serving_the_model.md)

View File

@ -1,8 +1,8 @@
# Serving the model
We are going to use [seldon-core](https://github.com/SeldonIO/seldon-core) to serve the model. [IssueSummarization.py](notebooks/IssueSummarization.py) contains the code for this model. We will wrap this class into a seldon-core microservice which we can then deploy as a REST or GRPC API server.
We are going to use [Seldon Core](https://github.com/SeldonIO/seldon-core) to serve the model. [IssueSummarization.py](notebooks/IssueSummarization.py) contains the code for this model. We will wrap this class into a seldon-core microservice which we can then deploy as a REST or GRPC API server.
> The model is written in Keras and when exported as a TensorFlow model seems to be incompatible with TensorFlow Serving. So we're using seldon-core to serve this model since seldon-core allows you to serve any arbitrary model. More details [here](https://github.com/kubeflow/examples/issues/11#issuecomment-371005885).
> The model is written in Keras and when exported as a TensorFlow model seems to be incompatible with TensorFlow Serving. So we're using Seldon Core to serve this model since seldon-core allows you to serve any arbitrary model. More details [here](https://github.com/kubeflow/examples/issues/11#issuecomment-371005885).
# Building a model server
@ -14,42 +14,60 @@ You have two options for getting a model server
* So you can just run this image to get a pre-trained model
* Serving your own model using this server is discussed below
1. You can build your own model server as discussed below
1. You can build your own model server as discussed below. For this you will need to install the [Source2Image executable s2i](https://github.com/openshift/source-to-image).
## Wrap the model into a seldon-core microservice
## Wrap the model into a Seldon Core microservice
cd into the notebooks directory and run the following docker command. This will create a build/ directory.
Set a couple of environment variables to specify the GCP Project and the TAG you want to build the image for:
```
PROJECT=my-gcp-project
TAG=0.1
```
cd into the notebooks directory and run the following command (you will need [s2i](https://github.com/openshift/source-to-image) installed):
```
cd notebooks/
docker run -v $(pwd):/my_model seldonio/core-python-wrapper:0.7 /my_model IssueSummarization 0.1 gcr.io --base-image=python:3.6 --image-name=gcr-repository-name/issue-summarization
make build-model-image PROJECT=${PROJECT} TAG=${TAG}
```
The build/ directory contains all the necessary files to build the seldon-core microservice image
This will use [S2I](https://github.com/openshift/source-to-image) to wrap the inference code in `IssueSummarization.py` so it can be run and managed by Seldon Core.
Now you should see an image named `gcr.io/<gcr-repository-name>/issue-summarization:0.1` in your docker images. To test the model, you can run it locally using:
```
cd build/
./build_image.sh
make start-docker-model-image PROJECT=${PROJECT} TAG=${TAG}
```
Now you should see an image named `gcr.io/gcr-repository-name/issue-summarization:0.1` in your docker images. To test the model, you can run it locally using
To send an example payload to the server run:
`docker run -p 5000:5000 gcr.io/gcr-repository-name/issue-summarization:0.1`
```
make test-model-image_local
```
You can push the image by running `gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization:0.1`
or you can run a curl command explicitly such as:
```
curl -g http://localhost:5000/predict --data-urlencode 'json={"data":{"ndarray":[["try to stop flask from using multiple threads"]]}}'
```
To stop the running server run:
```
make stop-docker-model-image
```
You can push the image by running:
```
make push-model-image PROJECT=${PROJECT} TAG=${TAG}
```
> You can find more details about wrapping a model with seldon-core [here](https://github.com/SeldonIO/seldon-core/blob/master/docs/wrappers/python.md)
### Storing a model in the Docker image
If you want to store a copy of the model in the Docker image make sure the following files are available in the directory in which you run
the commands in the previous steps. These files are produced by the [training](training_the_model.md) step in your `notebooks` directory:
* `seq2seq_model_tutorial.h5` - the keras model
* `body_pp.dpkl` - the serialized body preprocessor
* `title_pp.dpkl` - the serialized title preprocessor
# Deploying the model to your kubernetes cluster
@ -58,41 +76,57 @@ Now that we have an image with our model server, we can deploy it to our kuberne
## Deploy Seldon Core
Install the CRD and it's controller using the seldon prototype
Install the CRD and its controller using the seldon prototype. If you used
`kfctl` to install Kubeflow, seldon is already included and you can run
the following commands (if not, follow the
[quick start](https://www.kubeflow.org/docs/started/getting-started/#kubeflow-quick-start)
instructions to generate the k8s manifests first):
```bash
cd ks-kubeflow
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
# Install the kubeflow/seldon package
ks pkg install kubeflow/seldon
cd ks_app
# Generate the seldon component and deploy it
ks generate seldon seldon --name=seldon --namespace=${NAMESPACE}
ks generate seldon seldon --namespace=${NAMESPACE}
ks apply ${KF_ENV} -c seldon
```
Seldon Core should now be running on your cluster. You can verify it by running `kubectl get pods -n${NAMESPACE}`. You should see a pod named `seldon-cluster-manager-*`
Seldon Core should now be running on your cluster. You can verify it by running
`kubectl get pods -n${NAMESPACE}`. You should see two pods named
`seldon-seldon-cluster-manager-*` and `seldon-redis-*`.
## Deploying the actual model
Now that you have seldon core deployed, you can deploy the model using the `kubeflow/seldon-serve-simple` prototype.
Now that you have seldon core deployed, you can deploy the model using the
`seldon-serve-simple-v1alpha2` prototype.
```bash
ks generate seldon-serve-simple issue-summarization-model-serving \
ks generate seldon-serve-simple-v1alpha2 issue-summarization-model \
--name=issue-summarization \
--image=gcr.io/gcr-repository-name/issue-summarization:0.1 \
--namespace=${NAMESPACE} \
--image=gcr.io/${PROJECT}/issue-summarization-model:${TAG} \
--replicas=2
ks apply ${KF_ENV} -c issue-summarization-model-serving
ks apply ${KF_ENV} -c issue-summarization-model
```
The model can take quite some time to become ready due to the loading times of the models and may be restarted if it fails the default liveness probe. If this happens you can add a custom livenessProbe to the issue-summarization.jsonnet file. Add the below to the container section:
```
"livenessProbe": {
"failureThreshold": 3,
"initialDelaySeconds": 30,
"periodSeconds": 5,
"successThreshold": 1,
"handler" : {
"tcpSocket": {
"port": "http"
}
},
```
# Sample request and response
Seldon Core uses ambassador to route it's requests. To send requests to the model, you can port-forward the ambassador container locally:
Seldon Core uses ambassador to route its requests. To send requests to the model, you can port-forward the ambassador container locally:
```
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8080:80
```

View File

@ -4,7 +4,7 @@ In this section, you will setup a barebones web server that displays the
prediction provided by the previously deployed model.
The following steps describe how to build a docker image and deploy it locally,
where it accepts as input any arbitrary text and displays amachine-generated
where it accepts as input any arbitrary text and displays a machine-generated
summary.
@ -18,7 +18,7 @@ Ensure that your model is live and listening for HTTP requests as described in
To build the front-end docker image, issue the following commands:
```commandline
```bash
cd docker
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui:0.1 .
```
@ -29,24 +29,31 @@ To store the docker image in a location accessible to GKE, push it to the
container registry of your choice. Here, it is pushed to Google Container
Registry.
```commandline
```bash
gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization-ui:0.1
```
## Deploy the front-end docker image to your kubernetes cluster
## Deploy the front-end docker image to your Kubernetes cluster
The folder [ks-kubeflow](ks-kubeflow) contains a ksonnet app. The ui component
in the `ks-kubeflow` app contains the frontend image deployment.
The folder [`ks_app`](ks_app) contains a ksonnet app. The
[ui component](ks_app/components/ui.jsonnet)
in `ks_app` contains the frontend deployment.
To avoid rate-limiting by the GitHub API, you will need an [authentication token](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md) stored in the form of an environment variable `${GITHUB_TOKEN}`. The token does not require any permissions and is only used to prevent anonymous API calls.
To use this token, set it as a parameter in the ui component:
```commandline
cd ks-kubeflow
```bash
cd ks_app
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
```
To set the URL of your trained model, add it as a parameter:
```bash
ks param set ui modelUrl "http://issue-summarization.${NAMESPACE}.svc.cluster.local:8000/api/v0.1/predictions" --env ${KF_ENV}
```
To serve the frontend interface, apply the `ui` component of the ksonnet app:
```
@ -58,13 +65,14 @@ ks apply ${KF_ENV} -c ui
We use `ambassador` to route requests to the frontend. You can port-forward the
ambassador container locally:
```commandline
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80
```bash
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8080:80
```
In a browser, navigate to `http://localhost:8080/issue-summarization/`, where
you will be greeted by "Issuetext". Enter text into the input box and click
"Submit". You should see a summary that was provided by your trained model.
you will be greeted by a basic website. Press the *Populate Random Issue*
button, then click *Generate Title* to view
a summary that was provided by your trained model.
*Next*: [Teardown](05_teardown.md)

View File

@ -1,21 +1,13 @@
# Teardown
Delete the kubernetes `namespace`.
If you created a cluster with Click-to-Deploy or `kfctl`, delete the deployment
using the [GCP console](https://console.cloud.google.com/dm/deployments). The
default deployment name is `kubeflow`.
```commandline
kubectl delete namespace ${NAMESPACE}
```
Delete the PD (persistent disk) backing the NFS mount.
Delete the PD (persistent data) backing the NFS mount.
```commandline
```bash
gcloud --project=${PROJECT} compute disks delete --zone=${ZONE} ${PD_DISK_NAME}
```
Delete the `kubeflow-app` directory.
```commandline
rm -rf my-kubeflow
```
*Back*: [Querying the model](04_querying_the_model.md)

View File

@ -15,12 +15,23 @@
# Requirements:
# https://github.com/mattrobenolt/jinja2-cli
# pip install jinja2-clie
IMG = gcr.io/kubeflow-examples/issue-summarization-ui
#
# To override variables do
# make ${TARGET} ${VAR}=${VALUE}
#
# IMG is the base path for images..
# Individual images will be
# $(IMG)/$(NAME):$(TAG)
IMG ?= gcr.io/kubeflow-examples/github-issue-summarization
# List any changed files. We only include files in the notebooks directory.
# because that is the code in the docker image.
# In particular we exclude changes to the ksonnet configs.
CHANGED_FILES := $(shell git diff-files --relative=github_issue_summarization/docker)
CHANGED_FILES := $(shell git diff-files --relative=github_issue_summarization/)
# Whether to use cached images with GCB
USE_IMAGE_CACHE ?= true
ifeq ($(strip $(CHANGED_FILES)),)
# Changed files is empty; not dirty
@ -37,11 +48,11 @@ all: build
# To build without the cache set the environment variable
# export DOCKER_BUILD_OPTS=--no-cache
build:
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) . \
docker build ${DOCKER_BUILD_OPTS} -t $(IMG)/ui:$(TAG) . \
--label=git-verions=$(GIT_VERSION)
docker tag $(IMG):$(TAG) $(IMG):latest
@echo Built $(IMG):latest
@echo Built $(IMG):$(TAG)
docker tag $(IMG)/ui:$(TAG) $(IMG)/ui:latest
@echo Built $(IMG)/ui:latest
@echo Built $(IMG)/ui:$(TAG)
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
@ -49,3 +60,22 @@ build:
push: build
gcloud docker -- push $(IMG):$(TAG)
@echo Pushed $(IMG):$(TAG)
# Build the GCB workflow
build-gcb-spec:
rm -rf ./build
mkdir -p build
jsonnet ./image_build.jsonnet --ext-str imageBase=$(IMG) \
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
> ./build/image_build.json
# Build using GCB. This is useful if we are on a slow internet connection
# and don't want to pull images locally.
# Its also used to build from our CI system.
build-gcb: build-gcb-spec
cp -r ./docker ./build/
cp -r ./notebooks ./build/
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci \
--config=./build/image_build.json \
--timeout=3600 ./build

View File

@ -16,8 +16,8 @@ There are two primary goals for this tutorial:
By the end of this tutorial, you should learn how to:
* Setup a Kubeflow cluster on an existing Kubernetes deployment
* Spawn up a Jupyter Notebook on the cluster
* Spawn up a shared-persistent storage across the cluster to store large
* Spawn a Jupyter Notebook on the cluster
* Spawn a shared-persistent storage across the cluster to store large
datasets
* Train a Sequence-to-Sequence model using TensorFlow and GPUs on the cluster
* Serve the model using [Seldon Core](https://github.com/SeldonIO/seldon-core/)

View File

@ -83,7 +83,8 @@ Here are the instructions for setting up the demo.
### Training and Deploying the model.
We use the ksonnet app in [github/kubeflow/examples/github_issue_summarization/ks-kubeflow](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/ks-kubeflow)
We use the ksonnet app in
[github/kubeflow/examples/github_issue_summarization/ks_app](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/ks_app)
The current environment is
@ -94,14 +95,14 @@ export ENV=gh-demo-1003
Set a bucket for the job output
```
DAY=$(date +%Y%m%d)
ks param set --env=${ENV} tfjob-v1alpha2 output_model_gcs_bucket kubecon-gh-demo
ks param set --env=${ENV} tfjob-v1alpha2 output_model_gcs_path gh-demo/${DAY}/output
ks param set --env=${ENV} tfjob output_model_gcs_bucket kubecon-gh-demo
ks param set --env=${ENV} tfjob output_model_gcs_path gh-demo/${DAY}/output
```
Run the job
```
ks apply ${ENV} -c tfjob-v1alpha2
ks apply ${ENV} -c tfjob
```
@ -128,4 +129,4 @@ ks apply ${ENV} -c tfjob-v1alpha2
```
ks apply ${ENV} -c tensorboard-pvc-tb
```
```

View File

@ -1,4 +0,0 @@
FROM golang:1.9
RUN mkdir -p /opt/kubeflow
COPY ./build/git-issue-summarize-demo /opt/kubeflow

View File

@ -1,100 +0,0 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/golang/protobuf"
packages = [
"proto",
"ptypes",
"ptypes/any",
"ptypes/duration",
"ptypes/timestamp"
]
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
version = "v1.1.0"
[[projects]]
name = "github.com/kubeflow/katib"
packages = ["pkg/api"]
revision = "f24b520cc52920ae511aeea235636462ebc21d21"
version = "v0.1.2-alpha"
[[projects]]
branch = "master"
name = "golang.org/x/net"
packages = [
"context",
"http/httpguts",
"http2",
"http2/hpack",
"idna",
"internal/timeseries",
"trace"
]
revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
[[projects]]
name = "golang.org/x/text"
packages = [
"collate",
"collate/build",
"internal/colltab",
"internal/gen",
"internal/tag",
"internal/triegen",
"internal/ucd",
"language",
"secure/bidirule",
"transform",
"unicode/bidi",
"unicode/cldr",
"unicode/norm",
"unicode/rangetable"
]
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
version = "v0.3.0"
[[projects]]
branch = "master"
name = "google.golang.org/genproto"
packages = ["googleapis/rpc/status"]
revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
[[projects]]
name = "google.golang.org/grpc"
packages = [
".",
"balancer",
"balancer/base",
"balancer/roundrobin",
"codes",
"connectivity",
"credentials",
"encoding",
"encoding/proto",
"grpclog",
"internal",
"internal/backoff",
"internal/channelz",
"internal/grpcrand",
"keepalive",
"metadata",
"naming",
"peer",
"resolver",
"resolver/dns",
"resolver/passthrough",
"stats",
"status",
"tap",
"transport"
]
revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
version = "v1.13.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
solver-name = "gps-cdcl"
solver-version = 1

View File

@ -1,38 +0,0 @@
# Gopkg.toml example
#
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
# for detailed Gopkg.toml documentation.
#
# required = ["github.com/user/thing/cmd/thing"]
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
#
# [[constraint]]
# name = "github.com/user/project"
# version = "1.0.0"
#
# [[constraint]]
# name = "github.com/user/project2"
# branch = "dev"
# source = "github.com/myfork/project2"
#
# [[override]]
# name = "github.com/x/y"
# version = "2.4.0"
#
# [prune]
# non-go = false
# go-tests = true
# unused-packages = true
[[constraint]]
name = "github.com/kubeflow/katib"
version = "0.1.2-alpha"
[[constraint]]
name = "google.golang.org/grpc"
version = "1.13.0"
[prune]
go-tests = true
unused-packages = true

View File

@ -1,53 +0,0 @@
# Copyright 2017 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Requirements:
# Make sure ${GOPATH}/src/github.com/kubeflow/examples
# points at a checked out version of the examples repository.
IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
DIR := ${CURDIR}
# List any changed files.
CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
ifeq ($(strip $(CHANGED_FILES)),)
# Changed files is empty; not dirty
# Don't include --dirty because it could be dirty if files outside the ones we care
# about changed.
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
else
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
endif
all: build
# To build without the cache set the environment variable
# export DOCKER_BUILD_OPTS=--no-cache
build: Dockerfile git-issue-summarize-demo.go
mkdir -p build
dep ensure
go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
docker tag $(IMG):$(TAG) $(IMG):latest
@echo Built $(IMG):$(TAG)
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
# first.
push: build
gcloud docker -- push $(IMG):$(TAG)
@echo Pushed $(IMG) with :$(TAG) tags
push-latest: push
gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
echo created $(IMG):latest

View File

@ -1,134 +0,0 @@
# Experimental: HP Tuning for GitHub Issue Summarization
This directoy contains experimental code for adding hyperparameter
tuning support to the GitHub issue summarization example using Katib.
## Instructions
1. Deploy Kubeflow
1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
1. Create the katib namespace
```
kubectl create namespace katib
```
* This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
1. Deploy the hyperparameter tuning job
```
cd kubeflow/examples/github_issue_summarization/ks-kubeflow
ks apply ${ENVIRONMENT} -c hp-tune
```
## UI
You can check your Model with Web UI.
Access to `http://${ENDPOINT}/katib/projects`
* If you are using GKE and IAP then ENDPOINT is the endpoint you
are serving Kubeflow on
* Otherwise you can port-forward to one of the AMBASSADOR pods
and ENDPOINT
```
kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
ENDPOINT=localhost:8080
```
The Results will be saved automatically.
## Description of git-issue-summarize-demo.go
You can make hyperparameter and evaluate it by Katib-API.
Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
A typical case, you will call APIs in the order as below.
In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
### CreateStudy
First, you should create Study.
The input is StudyConfig.
It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
This function generates a unique ID for your study and stores the config to DB.
Input:
* StudyConfig:
* Name: string
* Owner: string
* OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
* OptimizationGoal: float
* DefaultSuggestionAlgorithm: string
* DefaultEarlyStoppingAlgorithm: string
* ObjectiveValueName: string
* Metrics: List of Metrics name
* ParameterConfigs: List of parameter config.
Return:
* StudyID
### SetSuggestionParameters
Hyperparameters are generated by suggestion services with Parameter config of Study.
You can set the specific config for each suggestion.
Input:
* StudyID: ID of your study.
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
Return:
* ParameterID
### GetSuggestions
This function will create Trials(set of Parameters).
Input:
* StudyID: ID of your study.
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
* RequestNumber: the number you want to evaluate.
* ParamID: ParameterID you got from SetSuggestionParameters func.
Return
* List of Trials
* TrialID
* Parameter Sets
### RunTrial
Start to evaluate Trial.
When you use kubernetes runtime, the pods are created the specified config.
Input:
* StudyId: ID of your study.
* TrialId: ID of Trial.
* Runtime: worker type(e.g. kubernetes)
* WorkerConfig: runtime config
* Image: name of docker image
* Command: running commands
* GPU: number of GPU
* Scheduler: scheduler name
Return:
* List of WorkerID
### GetMetrics
Get metrics of running workers.
Input:
* StudyId: ID of your study.
* WorkerIDs: List of worker ID you want to get metrics from.
Return:
* List of Metrics
### SaveModel
Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
When you call this API multiple time, only Metrics will be updated.
Input:
* ModelInfo
* StudyName
* WorkerId
* Parameters: List of Parameter
* Metrics: List of Metrics
* ModelPath: path to model saved. (PVCname:mountpath)
* DataSet: informatino of input date
* Name
* Path: path to input data.(PVCname:mountpath)
Return:
### GetWorkers
You can get worker list and status of workers.
Input:
Return:
* List of worker information

View File

@ -1,210 +0,0 @@
package main
import (
"context"
"flag"
"log"
"time"
"github.com/kubeflow/katib/pkg/api"
"google.golang.org/grpc"
)
var studyConfig = api.StudyConfig{
Name: "grid-demo",
Owner: "katib",
OptimizationType: api.OptimizationType_MAXIMIZE,
OptimizationGoal: 0.99,
ObjectiveValueName: "Validation-accuracy",
Metrics: []string{
"accuracy",
},
ParameterConfigs: &api.StudyConfig_ParameterConfigs{
Configs: []*api.ParameterConfig{
&api.ParameterConfig{
Name: "--learning_rate",
ParameterType: api.ParameterType_DOUBLE,
Feasible: &api.FeasibleSpace{
Min: "0.005",
Max: "0.5",
},
},
},
},
}
var gridConfig = []*api.SuggestionParameter{
&api.SuggestionParameter{
Name: "DefaultGrid",
Value: "4",
},
&api.SuggestionParameter{
Name: "--learning_rate",
Value: "2",
},
}
var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
func main() {
flag.Parse()
conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
if err != nil {
log.Fatalf("could not connect: %v", err)
}
workerConfig := api.WorkerConfig{
Image: *trainerImage,
Command: []string{
"python",
"/workdir/train.py",
"--sample_size",
"20000",
// "--input_data_gcs_bucket",
// "katib-gi-example",
// "--input_data_gcs_path",
// "github-issue-summarization-data/github-issues.zip",
// "--output_model_gcs_bucket",
// "katib-gi-example",
},
Gpu: 0,
Scheduler: "default-scheduler",
}
defer conn.Close()
ctx := context.Background()
c := api.NewManagerClient(conn)
createStudyreq := &api.CreateStudyRequest{
StudyConfig: &studyConfig,
}
createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
if err != nil {
log.Fatalf("StudyConfig Error %v", err)
}
studyId := createStudyreply.StudyId
log.Printf("Study ID %s", studyId)
getStudyreq := &api.GetStudyRequest{
StudyId: studyId,
}
getStudyReply, err := c.GetStudy(ctx, getStudyreq)
if err != nil {
log.Fatalf("GetConfig Error %v", err)
}
log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
StudyId: studyId,
SuggestionAlgorithm: "grid",
SuggestionParameters: gridConfig,
}
setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
if err != nil {
log.Fatalf("SetConfig Error %v", err)
}
log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
getGridSuggestRequest := &api.GetSuggestionsRequest{
StudyId: studyId,
SuggestionAlgorithm: "grid",
RequestNumber: 0,
//RequestNumber=0 means get all grids.
ParamId: setSuggesitonParameterReply.ParamId,
}
getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
if err != nil {
log.Fatalf("GetSuggestion Error %v", err)
}
log.Println("Get Grid Suggestions:")
for _, t := range getGridSuggestReply.Trials {
log.Printf("%v", t)
}
workerIds := make([]string, len(getGridSuggestReply.Trials))
workerParameter := make(map[string][]*api.Parameter)
for i, t := range getGridSuggestReply.Trials {
ws := workerConfig
rtr := &api.RunTrialRequest{
StudyId: studyId,
TrialId: t.TrialId,
Runtime: "kubernetes",
WorkerConfig: &ws,
}
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
for _, p := range t.ParameterSet {
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
}
workerReply, err := c.RunTrial(ctx, rtr)
if err != nil {
log.Fatalf("RunTrial Error %v", err)
}
workerIds[i] = workerReply.WorkerId
workerParameter[workerReply.WorkerId] = t.ParameterSet
saveModelRequest := &api.SaveModelRequest{
Model: &api.ModelInfo{
StudyName: studyConfig.Name,
WorkerId: workerReply.WorkerId,
Parameters: t.ParameterSet,
Metrics: []*api.Metrics{},
ModelPath: "pvc:/Path/to/Model",
},
DataSet: &api.DataSetInfo{
Name: "GitHub",
Path: "/path/to/data",
},
}
_, err = c.SaveModel(ctx, saveModelRequest)
if err != nil {
log.Fatalf("SaveModel Error %v", err)
}
log.Printf("WorkerID %s start\n", workerReply.WorkerId)
}
for true {
time.Sleep(10 * time.Second)
getMetricsRequest := &api.GetMetricsRequest{
StudyId: studyId,
WorkerIds: workerIds,
}
getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
if err != nil {
log.Printf("GetMetErr %v", err)
continue
}
for _, mls := range getMetricsReply.MetricsLogSets {
if len(mls.MetricsLogs) > 0 {
//Only Metrics can be updated.
saveModelRequest := &api.SaveModelRequest{
Model: &api.ModelInfo{
StudyName: studyConfig.Name,
WorkerId: mls.WorkerId,
Metrics: []*api.Metrics{},
},
}
for _, ml := range mls.MetricsLogs {
if len(ml.Values) > 0 {
log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
}
}
_, err = c.SaveModel(ctx, saveModelRequest)
if err != nil {
log.Fatalf("SaveModel Error %v", err)
}
}
}
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
if err != nil {
log.Fatalf("GetWorker Error %v", err)
}
completeCount := 0
for _, w := range getWorkerReply.Workers {
if w.Status == api.State_COMPLETED {
completeCount++
}
}
if completeCount == len(getWorkerReply.Workers) {
log.Printf("All Worker Completed!")
break
}
}
}

View File

@ -0,0 +1,96 @@
// TODO(jlewi): We should tag the image latest and then
// use latest as a cache so that rebuilds are fast
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
{
// Convert non-boolean types like string,number to a boolean.
// This is primarily intended for dealing with parameters that should be booleans.
local toBool = function(x) {
result::
if std.type(x) == "boolean" then
x
else if std.type(x) == "string" then
std.asciiUpper(x) == "TRUE"
else if std.type(x) == "number" then
x != 0
else
false,
}.result,
local useImageCache = toBool(std.extVar("useImageCache")),
// A tempalte for defining the steps for building each image.
local subGraphTemplate = {
// following variables must be set
name: null,
dockerFile: null,
buildArg: null,
contextDir: ".",
local template = self,
local pullStep = if useImageCache then [
{
id: "pull-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
waitFor: ["-"],
},
] else [],
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
images: [image, imageLatest],
steps: pullStep +
[
{
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
id: "build-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: [
"build",
"-t",
image,
"--label=git-versions=" + std.extVar("gitVersion"),
]
+ buildArgList
+ [
"--file=" + template.dockerFile,
]
+ cacheList + [template.contextDir],
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
},
{
id: "tag-" + template.name,
name: "gcr.io/cloud-builders/docker",
args: ["tag", image, imageLatest],
waitFor: ["build-" + template.name],
},
],
},
local uiSteps = subGraphTemplate {
name: "ui",
dockerFile: "./docker/Dockerfile",
contextDir: "./docker"
},
local trainerSteps = subGraphTemplate {
name: "trainer",
dockerFile: "./notebooks/Dockerfile",
contextDir: "./notebooks"
},
local trainerEstimatorSteps = subGraphTemplate {
name: "trainer-estimator",
dockerFile: "./notebooks/Dockerfile.estimator",
contextDir: "./notebooks"
},
steps: uiSteps.steps + trainerSteps.steps + trainerEstimatorSteps.steps,
images: uiSteps.images + trainerSteps.images + trainerEstimatorSteps.images,
}

View File

@ -1,6 +0,0 @@
/lib
/.ksonnet/registries
/app.override.yaml
/.ks_environment
# Ignore all environments? Do we want to check in dev.kubeflow.org?
/environments

View File

@ -1,39 +0,0 @@
apiVersion: 0.1.0
gitVersion:
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
refSpec: master
kind: ksonnet.io/registry
libraries:
apache:
path: apache
version: master
efk:
path: efk
version: master
mariadb:
path: mariadb
version: master
memcached:
path: memcached
version: master
mongodb:
path: mongodb
version: master
mysql:
path: mysql
version: master
nginx:
path: nginx
version: master
node:
path: node
version: master
postgres:
path: postgres
version: master
redis:
path: redis
version: master
tomcat:
path: tomcat
version: master

View File

@ -1,39 +0,0 @@
apiVersion: 0.1.0
gitVersion:
commitSha: 3be196cfa1d68d9a33e0674c133ffbbcc3e57d46
refSpec: v0.2.0-rc.1
kind: ksonnet.io/registry
libraries:
argo:
path: argo
version: master
automation:
path: automation
version: master
core:
path: core
version: master
katib:
path: katib
version: master
mpi-job:
path: mpi-job
version: master
new-package-stub:
path: new-package-stub
version: master
openmpi:
path: openmpi
version: master
pachyderm:
path: pachyderm
version: master
pytorch-job:
path: pytorch-job
version: master
tf-job:
path: tf-job
version: master
tf-serving:
path: tf-serving
version: master

View File

@ -1,61 +0,0 @@
apiVersion: 0.1.0
environments:
cloud:
destination:
namespace: namespace
server: https://35.188.73.10
k8sVersion: v1.7.0
path: cloud
default:
destination:
namespace: default
server: https://35.188.73.10
k8sVersion: v1.7.0
path: default
gh-demo-1003:
destination:
namespace: kubeflow
server: https://104.196.134.59
k8sVersion: v1.10.7
path: gh-demo-1003
jlewi:
destination:
namespace: kubeflow
server: https://35.196.4.129
k8sVersion: v1.9.6
path: jlewi
kubecon-gh-demo-1:
destination:
namespace: kubeflow
server: https://35.231.60.188
k8sVersion: v1.7.0
path: kubecon-gh-demo-1
kind: ksonnet.io/app
libraries:
core:
gitVersion:
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
refSpec: master
name: core
registry: kubeflow
examples:
gitVersion:
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
refSpec: master
name: examples
registry: kubeflow
seldon:
gitVersion:
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
refSpec: master
name: seldon
registry: kubeflow
name: ks-kubeflow
registries:
kubeflow:
gitVersion:
commitSha: 3be196cfa1d68d9a33e0674c133ffbbcc3e57d46
refSpec: v0.2.0-rc.1
protocol: github
uri: github.com/kubeflow/kubeflow/tree/v0.2.0-rc.1/kubeflow
version: 0.0.1

View File

@ -1,13 +0,0 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["kubeflow-core"];
local k = import "k.libsonnet";
local all = import "kubeflow/core/all.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))

View File

@ -1,82 +0,0 @@
{
global: {},
components: {
// Component-level parameters, defined initially from 'ks prototype use ...'
// Each object below should correspond to a component in the components/ directory
"data-pvc": {},
seldon: {
apifeImage: "seldonio/apife:0.1.5",
apifeServiceType: "NodePort",
engineImage: "seldonio/engine:0.1.5",
name: "seldon",
namespace: "null",
operatorImage: "seldonio/cluster-manager:0.1.5",
operatorJavaOpts: "null",
operatorSpringOpts: "null",
withApife: "false",
withRbac: "true",
},
"issue-summarization-model-serving": {
endpoint: "REST",
image: "gcr.io/kubeflow-examples/issue-summarization-model:v20180427-e2aa113",
name: "issue-summarization",
namespace: "null",
replicas: 2,
},
tensorboard: {
image: "tensorflow/tensorflow:1.7.0",
// logDir needs to be overwritten based on where the data is
// actually stored.
logDir: "",
name: "gh",
},
ui: {
namespace: "null",
githubToken: "",
image: "gcr.io/kubeflow-examples/issue-summarization-ui:v20180629-v0.1-2-g98ed4b4-dirty-182929",
},
"tfjob-v1alpha2": {
name: "tfjob-issue-summarization",
image: "gcr.io/kubeflow-examples/tf-job-issue-summarization:v20180629-v0.1-2-g98ed4b4-dirty-182929",
input_data_gcs_bucket: "kubeflow-examples",
input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
output_model_gcs_bucket: "kubeflow-examples",
output_model_gcs_path: "github-issue-summarization-data",
sample_size: "100000",
gcpSecretName: "user-gcp-sa",
gcpSecretFile: "user-gcp-sa.json",
},
"kubeflow-core": {
AmbassadorImage: "quay.io/datawire/ambassador:0.30.1",
AmbassadorServiceType: "ClusterIP",
StatsdImage: "quay.io/datawire/statsd:0.30.1",
centralUiImage: "gcr.io/kubeflow-images-public/centraldashboard:v20180618-v0.2.0-rc.0-5-g715aafc8-e3b0c4",
cloud: "null",
disks: "null",
jupyterHubAuthenticator: "null",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:v20180531-3bb991b1",
jupyterHubServiceType: "ClusterIP",
jupyterNotebookPVCMount: "/home/jovyan",
jupyterNotebookRegistry: "gcr.io",
jupyterNotebookRepoName: "kubeflow-images-public",
name: "kubeflow-core",
namespace: "null",
reportUsage: "false",
tfDefaultImage: "null",
tfJobImage: "gcr.io/kubeflow-images-public/tf_operator:v0.2.0",
tfJobUiServiceType: "ClusterIP",
tfJobVersion: "v1alpha2",
usageId: "unknown_cluster",
},
"tensor2tensor-v1alpha2": {
name: "tensor2tensor-v1alpha2",
},
"data-downloader": {},
"tfjob-pvc-v1alpha2": {
name: "tfjob-pvc-v1alpha2",
},
"hp-tune": {},
// Run tensorboard with pvc.
// This is intended for use with tfjob-estimator
},
}

View File

@ -1,57 +0,0 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.seldon;
local k = import "k.libsonnet";
local core = import "kubeflow/seldon/core.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
local name = params.name;
local namespace = updatedParams.namespace;
local withRbac = params.withRbac;
local withApife = params.withApife;
// APIFE
local apifeImage = params.apifeImage;
local apifeServiceType = params.apifeServiceType;
// Cluster Manager (The CRD Operator)
local operatorImage = params.operatorImage;
local operatorSpringOptsParam = params.operatorSpringOpts;
local operatorSpringOpts = if operatorSpringOptsParam != "null" then operatorSpringOptsParam else "";
local operatorJavaOptsParam = params.operatorJavaOpts;
local operatorJavaOpts = if operatorJavaOptsParam != "null" then operatorJavaOptsParam else "";
// Engine
local engineImage = params.engineImage;
// APIFE
local apife = [
core.parts(namespace).apife(apifeImage, withRbac),
core.parts(namespace).apifeService(apifeServiceType),
];
local rbac = [
core.parts(namespace).rbacServiceAccount(),
core.parts(namespace).rbacClusterRoleBinding(),
];
// Core
local coreComponents = [
core.parts(namespace).deploymentOperator(engineImage, operatorImage, operatorSpringOpts, operatorJavaOpts, withRbac),
core.parts(namespace).redisDeployment(),
core.parts(namespace).redisService(),
core.parts(namespace).crd(),
];
if withRbac == "true" && withApife == "true" then
k.core.v1.list.new(apife + rbac + coreComponents)
else if withRbac == "true" && withApife == "false" then
k.core.v1.list.new(rbac + coreComponents)
else if withRbac == "false" && withApife == "true" then
k.core.v1.list.new(apife + coreComponents)
else if withRbac == "false" && withApife == "false" then
k.core.v1.list.new(coreComponents)

View File

@ -1,85 +0,0 @@
{
all(params, env):: [
$.parts(params, env).service,
$.parts(params, env).deployment,
],
parts(params, env):: {
// Define some defaults.
local updatedParams = {
serviceType: "ClusterIP",
image: "gcr.io/kubeflow-images-public/issue-summarization-ui:latest",
modelUrl: "http://issue-summarization.kubeflow.svc.cluster.local:8000/api/v0.1/predictions",
} + params,
service:: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "issue-summarization-ui",
namespace: env.namespace,
annotations: {
"getambassador.io/config": "---\napiVersion: ambassador/v0\nkind: Mapping\nname: issue_summarization_ui\nprefix: /issue-summarization/\nrewrite: /\nservice: issue-summarization-ui:80\n",
},
},
spec: {
ports: [
{
port: 80,
targetPort: 80,
},
],
selector: {
app: "issue-summarization-ui",
},
type: updatedParams.serviceType,
},
},
deployment:: {
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "issue-summarization-ui",
namespace: env.namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "issue-summarization-ui",
},
},
spec: {
containers: [
{
args: [
"app.py",
"--model_url",
updatedParams.modelUrl,
],
command: [
"python",
],
image: updatedParams.image,
env: [
{
name: "GITHUB_TOKEN",
value: updatedParams.githubToken,
}
],
name: "issue-summarization-ui",
ports: [
{
containerPort: 80,
},
],
},
],
},
},
},
}, // deployment
}, // parts
}

View File

@ -1,7 +0,0 @@
local base = import "base.libsonnet";
local k = import "k.libsonnet";
base + {
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
}

View File

@ -1,17 +0,0 @@
local params = import '../../components/params.libsonnet';
params + {
components+: {
// Insert component parameter overrides here. Ex:
// guestbook +: {
// name: "guestbook-dev",
// replicas: params.global.replicas,
// },
"kubeflow-core"+: {
cloud: 'gke',
},
ui+: {
github_token: 'null',
},
},
}

View File

@ -1,7 +0,0 @@
local base = import "base.libsonnet";
local k = import "k.libsonnet";
base + {
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
}

View File

@ -1,5 +0,0 @@
local params = import '../../components/params.libsonnet';
params + {
components+: {},
}

View File

@ -1,7 +0,0 @@
local base = import "base.libsonnet";
local k = import "k.libsonnet";
base + {
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
}

View File

@ -1,5 +0,0 @@
local params = import '../../components/params.libsonnet';
params + {
components+: {},
}

View File

@ -1,80 +0,0 @@
local k8s = import "k8s.libsonnet";
local apps = k8s.apps;
local core = k8s.core;
local extensions = k8s.extensions;
local hidden = {
mapContainers(f):: {
local podContainers = super.spec.template.spec.containers,
spec+: {
template+: {
spec+: {
// IMPORTANT: This overwrites the 'containers' field
// for this deployment.
containers: std.map(f, podContainers),
},
},
},
},
mapContainersWithName(names, f) ::
local nameSet =
if std.type(names) == "array"
then std.set(names)
else std.set([names]);
local inNameSet(name) = std.length(std.setInter(nameSet, std.set([name]))) > 0;
self.mapContainers(
function(c)
if std.objectHas(c, "name") && inNameSet(c.name)
then f(c)
else c
),
};
k8s + {
apps:: apps + {
v1beta1:: apps.v1beta1 + {
local v1beta1 = apps.v1beta1,
daemonSet:: v1beta1.daemonSet + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
deployment:: v1beta1.deployment + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
},
},
core:: core + {
v1:: core.v1 + {
list:: {
new(items)::
{apiVersion: "v1"} +
{kind: "List"} +
self.items(items),
items(items):: if std.type(items) == "array" then {items+: items} else {items+: [items]},
},
},
},
extensions:: extensions + {
v1beta1:: extensions.v1beta1 + {
local v1beta1 = extensions.v1beta1,
daemonSet:: v1beta1.daemonSet + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
deployment:: v1beta1.deployment + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
},
},
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,83 +0,0 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["tensor2tensor-v1alpha2"];
local k = import "k.libsonnet";
local name = params.name;
local namespace = env.namespace;
local tfjob = {
apiVersion: "kubeflow.org/v1alpha2",
kind: "TFJob",
metadata: {
name: name,
namespace: namespace,
},
spec: {
tfReplicaSpecs: {
Master: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
command: masterCommand,
env: containerEnv,
[if updatedParams.workerGpu > 0 then "resources"]: {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
},
},
],
restartPolicy: "OnFailure",
},
},
}, // Master
Worker: {
replicas: updatedParams.workers,
template: {
spec: {
containers: [
{
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
name: "tensorflow",
command: workerCommand,
env: containerEnv,
[if updatedParams.workerGpu > 0 then "resources"]: {
limits: {
"nvidia.com/gpu": updatedParams.workerGpu,
},
},
},
],
restartPolicy: "OnFailure",
},
},
}, // Worker
Ps: {
replicas: updatedParams.ps,
template: {
spec: {
containers: [
{
image: updatedParams.cpuImage,
name: "tensorflow",
command: psCommand,
env: containerEnv,
},
],
restartPolicy: "OnFailure",
},
},
}, // Ps
}, // tfReplicaSpecs
}, // Spec
}; // tfJob
k.core.v1.list.new([
tfjob,
])

View File

@ -1,19 +0,0 @@
{
parts(params):: {
local ambassador = import "kubeflow/core/ambassador.libsonnet",
local jupyterhub = import "kubeflow/core/jupyterhub.libsonnet",
local nfs = import "kubeflow/core/nfs.libsonnet",
local tfjob = import "kubeflow/core/tf-job-operator.libsonnet",
local spartakus = import "kubeflow/core/spartakus.libsonnet",
local centraldashboard = import "kubeflow/core/centraldashboard.libsonnet",
local version = import "kubeflow/core/version.libsonnet",
all:: jupyterhub.all(params)
+ tfjob.all(params)
+ ambassador.all(params)
+ nfs.all(params)
+ spartakus.all(params)
+ centraldashboard.all(params)
+ version.all(params),
},
}

View File

@ -1,266 +0,0 @@
{
all(params):: [
$.parts(params.namespace, params.AmbassadorImage).service(params.AmbassadorServiceType),
$.parts(params.namespace, params.AmbassadorImage).adminService,
$.parts(params.namespace, params.AmbassadorImage).role,
$.parts(params.namespace, params.AmbassadorImage).serviceAccount,
$.parts(params.namespace, params.AmbassadorImage).roleBinding,
$.parts(params.namespace, params.AmbassadorImage).deploy(params.StatsdImage),
$.parts(params.namespace, params.AmbassadorImage).k8sDashboard(params.cloud),
],
parts(namespace, ambassadorImage):: {
service(serviceType):: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "ambassador",
},
name: "ambassador",
namespace: namespace,
},
spec: {
ports: [
{
name: "ambassador",
port: 80,
targetPort: 80,
},
],
selector: {
service: "ambassador",
},
type: serviceType,
},
}, // service
adminService:: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "ambassador-admin",
},
name: "ambassador-admin",
namespace: namespace,
},
spec: {
ports: [
{
name: "ambassador-admin",
port: 8877,
targetPort: 8877,
},
],
selector: {
service: "ambassador",
},
type: "ClusterIP",
},
}, // adminService
role:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: "ambassador",
namespace: namespace,
},
rules: [
{
apiGroups: [
"",
],
resources: [
"services",
],
verbs: [
"get",
"list",
"watch",
],
},
{
apiGroups: [
"",
],
resources: [
"configmaps",
],
verbs: [
"create",
"update",
"patch",
"get",
"list",
"watch",
],
},
{
apiGroups: [
"",
],
resources: [
"secrets",
],
verbs: [
"get",
"list",
"watch",
],
},
],
}, // role
serviceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "ambassador",
namespace: namespace,
},
}, // serviceAccount
roleBinding:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: "ambassador",
namespace: namespace,
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: "ambassador",
},
subjects: [
{
kind: "ServiceAccount",
name: "ambassador",
namespace: namespace,
},
],
}, // roleBinding
deploy(statsdImage):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "ambassador",
namespace: namespace,
},
spec: {
replicas: 3,
template: {
metadata: {
labels: {
service: "ambassador",
},
namespace: namespace,
},
spec: {
containers: [
{
env: [
{
name: "AMBASSADOR_NAMESPACE",
valueFrom: {
fieldRef: {
fieldPath: "metadata.namespace",
},
},
},
{
name: "AMBASSADOR_SINGLE_NAMESPACE",
value: "true",
},
],
image: ambassadorImage,
livenessProbe: {
httpGet: {
path: "/ambassador/v0/check_alive",
port: 8877,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
name: "ambassador",
readinessProbe: {
httpGet: {
path: "/ambassador/v0/check_ready",
port: 8877,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
resources: {
limits: {
cpu: 1,
memory: "400Mi",
},
requests: {
cpu: "200m",
memory: "100Mi",
},
},
},
{
image: statsdImage,
name: "statsd",
},
],
restartPolicy: "Always",
serviceAccountName: "ambassador",
},
},
},
}, // deploy
isDashboardTls(cloud)::
if cloud == "acsengine" || cloud == "aks" then
"false"
else
"true",
// This service adds a rule to our reverse proxy for accessing the K8s dashboard.
k8sDashboard(cloud):: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "k8s-dashboard",
namespace: namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: k8s-dashboard-ui-mapping",
"prefix: /k8s/ui/",
"rewrite: /",
"tls: " + $.parts(namespace, ambassadorImage).isDashboardTls(cloud),
// We redirect to the K8s service created for the dashboard
// in namespace kube-system. We don't use the k8s-dashboard service
// because that isn't in the kube-system namespace and I don't think
// it can select pods in a different namespace.
"service: kubernetes-dashboard.kube-system",
]),
}, //annotations
},
spec: {
ports: [
{
port: 443,
targetPort: 8443,
},
],
selector: {
"k8s-app": "kubernetes-dashboard",
},
type: "ClusterIP",
},
}, // k8sDashboard
}, // parts
}

View File

@ -1,159 +0,0 @@
{
// TODO(https://github.com/ksonnet/ksonnet/issues/222): Taking namespace as an argument is a work around for the fact that ksonnet
// doesn't support automatically piping in the namespace from the environment to prototypes.
// TODO(https://github.com/kubeflow/kubeflow/issues/527):
// We need to build and publish central UI docker image as part of our release process.
all(params):: [
$.parts(params.namespace).deployUi(params.centralUiImage),
$.parts(params.namespace).uiService,
$.parts(params.namespace).uiServiceAccount,
$.parts(params.namespace).uiRole,
$.parts(params.namespace).uiRoleBinding,
],
parts(namespace):: {
deployUi(centralUiImage):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: namespace,
},
spec: {
template: {
metadata: {
labels: {
app: "centraldashboard",
},
},
spec: {
containers: [
{
image: centralUiImage,
name: "centraldashboard",
ports: [
{
containerPort: 8082,
},
],
},
],
serviceAccountName: "centraldashboard",
},
},
},
}, // deployUi
uiService:: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: centralui-mapping",
"prefix: /",
"rewrite: /",
"service: centraldashboard." + namespace,
]),
}, //annotations
},
spec: {
ports: [
{
port: 80,
targetPort: 8082,
},
],
selector: {
app: "centraldashboard",
},
sessionAffinity: "None",
type: "ClusterIP",
},
}, //service
uiServiceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "centraldashboard",
namespace: namespace,
},
}, // service account
uiRole:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: namespace,
},
rules: [
{
apiGroups: [""],
resources: [
"pods",
"pods/exec",
"pods/log",
],
verbs: [
"get",
"list",
"watch",
],
},
{
apiGroups: [""],
resources: [
"secrets",
],
verbs: [
"get",
],
},
],
}, // operator-role
uiRoleBinding:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: namespace,
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "centraldashboard",
},
subjects: [
{
kind: "ServiceAccount",
name: "centraldashboard",
namespace: namespace,
},
],
}, // role binding
}, // parts
}

View File

@ -1,182 +0,0 @@
{
parts(namespace):: {
local k = import "k.libsonnet",
local certManagerImage = "quay.io/jetstack/cert-manager-controller:v0.2.4",
local certManagerIngressShimImage = "quay.io/jetstack/cert-manager-ingress-shim:v0.2.4",
// Note, not using std.prune to preserve required empty http01 map in the Issuer spec.
certManagerParts(acmeEmail, acmeUrl):: k.core.v1.list.new([
$.parts(namespace).certificateCRD,
$.parts(namespace).clusterIssuerCRD,
$.parts(namespace).issuerCRD,
$.parts(namespace).serviceAccount,
$.parts(namespace).clusterRole,
$.parts(namespace).clusterRoleBinding,
$.parts(namespace).deploy,
$.parts(namespace).issuerLEProd(acmeEmail, acmeUrl),
]),
certificateCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "certificates.certmanager.k8s.io",
},
spec: {
group: "certmanager.k8s.io",
version: "v1alpha1",
names: {
kind: "Certificate",
plural: "certificates",
},
scope: "Namespaced",
},
},
clusterIssuerCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "clusterissuers.certmanager.k8s.io",
},
spec: {
group: "certmanager.k8s.io",
version: "v1alpha1",
names: {
kind: "ClusterIssuer",
plural: "clusterissuers",
},
scope: "Cluster",
},
},
issuerCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "issuers.certmanager.k8s.io",
},
spec: {
group: "certmanager.k8s.io",
version: "v1alpha1",
names: {
kind: "Issuer",
plural: "issuers",
},
scope: "Namespaced",
},
},
serviceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "cert-manager",
namespace: namespace,
},
},
clusterRole:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
name: "cert-manager",
},
rules: [
{
apiGroups: ["certmanager.k8s.io"],
resources: ["certificates", "issuers", "clusterissuers"],
verbs: ["*"],
},
{
apiGroups: [""],
resources: ["secrets", "events", "endpoints", "services", "pods"],
verbs: ["*"],
},
{
apiGroups: ["extensions"],
resources: ["ingresses"],
verbs: ["*"],
},
],
},
clusterRoleBinding:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
name: "cert-manager",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "cert-manager",
},
subjects: [
{
name: "cert-manager",
namespace: namespace,
kind: "ServiceAccount",
},
],
},
deploy:: {
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "cert-manager",
namespace: namespace,
labels: {
app: "cert-manager",
},
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "cert-manager",
},
},
spec: {
serviceAccountName: "cert-manager",
containers: [
{
name: "cert-manager",
image: certManagerImage,
imagePullPolicy: "IfNotPresent",
},
{
name: "ingress-shim",
image: certManagerIngressShimImage,
imagePullPolicy: "IfNotPresent",
},
],
},
},
},
},
issuerLEProd(acmeEmail, acmeUrl):: {
apiVersion: "certmanager.k8s.io/v1alpha1",
kind: "Issuer",
metadata: {
name: "letsencrypt-prod",
namespace: namespace,
},
spec: {
acme: {
server: acmeUrl,
email: acmeEmail,
privateKeySecretRef: {
name: "letsencrypt-prod-secret",
},
http01: {
},
},
},
},
},
}

View File

@ -1,332 +0,0 @@
{
parts(namespace):: {
local k = import "k.libsonnet",
local cloudEndpointsImage = "gcr.io/cloud-solutions-group/cloud-endpoints-controller:0.1.1",
local metacontrollerImage = "gcr.io/enisoc-kubernetes/metacontroller@sha256:18561c63e1c5380ac5bbaabefa933e484bdb499f10b61071506f9a0070bc65f6",
cloudEndpointsParts(secretName, secretKey):: k.core.v1.list.new([
$.parts(namespace).metaServiceAccount,
$.parts(namespace).metaClusterRole,
$.parts(namespace).metaClusterRoleBinding,
$.parts(namespace).metaInitializerCRD,
$.parts(namespace).metaLambdaCRD,
$.parts(namespace).metaDeployment,
$.parts(namespace).endpointsCRD,
$.parts(namespace).endpointsService,
$.parts(namespace).endpointsServiceAccount,
$.parts(namespace).endpointsClusterRole,
$.parts(namespace).endpointsClusterRoleBinding,
$.parts(namespace).endpointsDeploy(secretName, secretKey),
$.parts(namespace).endpointsLambdaController,
]),
metaServiceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "kube-metacontroller",
namespace: namespace,
},
}, // metaServiceAccount
metaClusterRole:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
name: "kube-metacontroller",
},
rules: [
{
apiGroups: ["*"],
resources: ["*"],
verbs: ["*"],
},
],
}, // metaClusterRole
metaClusterRoleBinding:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
name: "kube-metacontroller",
},
subjects: [
{
kind: "ServiceAccount",
name: "kube-metacontroller",
namespace: namespace,
},
],
roleRef: {
kind: "ClusterRole",
name: "kube-metacontroller",
apiGroup: "rbac.authorization.k8s.io",
},
}, // metaClusterRoleBinding
metaInitializerCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "initializercontrollers.metacontroller.k8s.io",
},
spec: {
group: "metacontroller.k8s.io",
version: "v1alpha1",
scope: "Cluster",
names: {
plural: "initializercontrollers",
singular: "initializercontroller",
kind: "InitializerController",
shortNames: [
"ic",
"ictl",
],
},
},
}, // metaInitializerCRD
metaLambdaCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "lambdacontrollers.metacontroller.k8s.io",
},
spec: {
group: "metacontroller.k8s.io",
version: "v1alpha1",
scope: "Cluster",
names: {
plural: "lambdacontrollers",
singular: "lambdacontroller",
kind: "LambdaController",
shortNames: [
"lc",
"lctl",
],
},
},
}, // metaLambdaCRD
metaDeployment:: {
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "kube-metacontroller",
namespace: namespace,
labels: {
app: "kube-metacontroller",
},
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "kube-metacontroller",
},
},
spec: {
serviceAccountName: "kube-metacontroller",
containers: [
{
name: "kube-metacontroller",
image: metacontrollerImage,
command: [
"/usr/bin/metacontroller",
],
args: [
"--logtostderr",
],
imagePullPolicy: "Always",
},
],
},
},
},
}, // metaDeployment
endpointsCRD:: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "cloudendpoints.ctl.isla.solutions",
},
spec: {
group: "ctl.isla.solutions",
version: "v1",
scope: "Namespaced",
names: {
plural: "cloudendpoints",
singular: "cloudendpoint",
kind: "CloudEndpoint",
shortNames: [
"cloudep",
"ce",
],
},
},
}, // endpointsCRD
endpointsService:: {
apiVersion: "v1",
kind: "Service",
metadata: {
name: "cloud-endpoints-controller",
namespace: namespace,
},
spec: {
type: "ClusterIP",
ports: [
{
name: "http",
port: 80,
},
],
selector: {
app: "cloud-endpoints-controller",
},
},
}, // endpointsService
endpointsLambdaController:: {
apiVersion: "metacontroller.k8s.io/v1alpha1",
kind: "LambdaController",
metadata: {
name: "cloud-endpoints-controller",
},
spec: {
parentResource: {
apiVersion: "ctl.isla.solutions/v1",
resource: "cloudendpoints",
},
childResources: [],
clientConfig: {
service: {
name: "cloud-endpoints-controller",
namespace: namespace,
caBundle: "...",
},
},
hooks: {
sync: {
path: "/sync",
},
},
generateSelector: true,
},
}, // endpointsLambdaController
endpointsServiceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "cloud-endpoints-controller",
namespace: namespace,
},
}, // endpointsServiceAccount
endpointsClusterRole:: {
kind: "ClusterRole",
apiVersion: "rbac.authorization.k8s.io/v1beta1",
metadata: {
name: "cloud-endpoints-controller",
namespace: namespace,
},
rules: [
{
apiGroups: [""],
resources: ["services"],
verbs: ["get", "list"],
},
{
apiGroups: ["extensions"],
resources: ["ingresses"],
verbs: ["get", "list"],
},
],
}, // endpointsClusterRole
endpointsClusterRoleBinding:: {
kind: "ClusterRoleBinding",
apiVersion: "rbac.authorization.k8s.io/v1beta1",
metadata: {
name: "cloud-endpoints-controller",
},
subjects: [
{
kind: "ServiceAccount",
name: "cloud-endpoints-controller",
namespace: namespace,
},
],
roleRef: {
kind: "ClusterRole",
name: "cloud-endpoints-controller",
apiGroup: "rbac.authorization.k8s.io",
},
}, // endpointsClusterRoleBinding
endpointsDeploy(secretName, secretKey):: {
apiVersion: "apps/v1beta1",
kind: "Deployment",
metadata: {
name: "cloud-endpoints-controller",
namespace: namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "cloud-endpoints-controller",
},
},
spec: {
serviceAccountName: "cloud-endpoints-controller",
terminationGracePeriodSeconds: 5,
containers: [
{
name: "cloud-endpoints-controller",
image: cloudEndpointsImage,
imagePullPolicy: "Always",
env: [
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/var/run/secrets/sa/" + secretKey,
},
],
volumeMounts: [
{
name: "sa-key",
readOnly: true,
mountPath: "/var/run/secrets/sa",
},
],
readinessProbe: {
httpGet: {
path: "/healthz",
port: 80,
scheme: "HTTP",
},
periodSeconds: 5,
timeoutSeconds: 5,
successThreshold: 1,
failureThreshold: 2,
},
},
],
volumes: [
{
name: "sa-key",
secret: {
secretName: secretName,
},
},
],
},
},
},
}, // endpointsDeploy
}, // parts
}

View File

@ -1,74 +0,0 @@
#!/bin/bash
#
# A script to modify envoy config to perform JWT validation
# given the information for the service.
# Script executed by the iap container to configure IAP. When finished, the envoy config is created with the JWT audience.
[ -z ${CLIENT_ID} ] && echo Error CLIENT_ID must be set && exit 1
[ -z ${CLIENT_SECRET} ] && echo Error CLIENT_SECRET must be set && exit 1
[ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1
[ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1
apk add --update jq
curl https://storage.googleapis.com/kubernetes-release/release/v1.9.4/bin/linux/amd64/kubectl > /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl
PROJECT=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
if [ -z ${PROJECT} ]; then
echo Error unable to fetch PROJECT from compute metadata
exit 1
fi
PROJECT_NUM=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/numeric-project-id)
if [ -z ${PROJECT_NUM} ]; then
echo Error unable to fetch PROJECT_NUM from compute metadata
exit 1
fi
# Activate the service account
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
# Print out the config for debugging
gcloud config list
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
while [[ -z ${BACKEND_ID} ]];
do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)');
echo "Waiting for backend id PROJECT=${PROJECT} NAMESPACE=${NAMESPACE} SERVICE=${SERVICE} filter=name~k8s-be-${NODE_PORT}-...";
sleep 2;
done
echo BACKEND_ID=${BACKEND_ID}
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri)
JWT_AUDIENCE="/projects/${PROJECT_NUM}/global/backendServices/${BACKEND_ID}"
# For healthcheck compare.
echo "JWT_AUDIENCE=${JWT_AUDIENCE}" > /var/shared/healthz.env
echo "NODE_PORT=${NODE_PORT}" >> /var/shared/healthz.env
echo "BACKEND_ID=${BACKEND_ID}" >> /var/shared/healthz.env
kubectl get configmap -n ${NAMESPACE} envoy-config -o jsonpath='{.data.envoy-config\.json}' | \
sed -e "s|{{JWT_AUDIENCE}}|${JWT_AUDIENCE}|g" > /var/shared/envoy-config.json
echo "Restarting envoy"
curl -s ${ENVOY_ADMIN}/quitquitquit
function checkIAP() {
# created by init container.
. /var/shared/healthz.env
# If node port or backend id change, so does the JWT audience.
CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
CURR_BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id)')
[ "$BACKEND_ID" == "$CURR_BACKEND_ID" ]
}
# Verify IAP every 10 seconds.
while true; do
if ! checkIAP; then
echo "$(date) WARN: IAP check failed, restarting container."
exit 1
fi
sleep 10
done

View File

@ -1,817 +0,0 @@
{
parts(namespace):: {
local k = import "k.libsonnet",
// Test if the given hostname is in the form of: "NAME.endpoints.PROJECT.cloud.goog"
local isCloudEndpoint = function(str) {
local toks = std.split(str, "."),
result::
(std.length(toks) == 5 && toks[1] == "endpoints" && toks[3] == "cloud" && toks[4] == "goog"),
}.result,
// Creates map of parameters from a given hostname in the form of: "NAME.endpoints.PROJECT.cloud.goog"
local makeEndpointParams = function(str) {
local toks = std.split(str, "."),
result:: {
name: toks[0],
project: toks[2],
},
}.result,
ingressParts(secretName, ipName, hostname, issuer, envoyImage, disableJwt, oauthSecretName):: std.prune(k.core.v1.list.new([
$.parts(namespace).service,
$.parts(namespace).ingress(secretName, ipName, hostname),
$.parts(namespace).certificate(secretName, hostname, issuer),
$.parts(namespace).initServiceAccount,
$.parts(namespace).initClusterRoleBinding,
$.parts(namespace).initClusterRole,
$.parts(namespace).deploy(envoyImage, oauthSecretName),
$.parts(namespace).iapEnabler(oauthSecretName),
$.parts(namespace).configMap(disableJwt),
$.parts(namespace).whoamiService,
$.parts(namespace).whoamiApp,
(if isCloudEndpoint(hostname) then $.parts(namespace).cloudEndpoint(makeEndpointParams(hostname))),
])),
service:: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "envoy",
},
name: "envoy",
namespace: namespace,
},
spec: {
ports: [
{
name: "envoy",
port: envoyPort,
targetPort: envoyPort,
},
],
selector: {
service: "envoy",
},
// NodePort because this will be the backend for our ingress.
type: "NodePort",
},
}, // service
initServiceAccount:: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "envoy",
namespace: namespace,
},
}, // initServiceAccount
initClusterRoleBinding:: {
kind: "ClusterRoleBinding",
apiVersion: "rbac.authorization.k8s.io/v1beta1",
metadata: {
name: "envoy",
},
subjects: [
{
kind: "ServiceAccount",
name: "envoy",
namespace: namespace,
},
],
roleRef: {
kind: "ClusterRole",
name: "envoy",
apiGroup: "rbac.authorization.k8s.io",
},
}, // initClusterRoleBinding
initClusterRole:: {
kind: "ClusterRole",
apiVersion: "rbac.authorization.k8s.io/v1beta1",
metadata: {
name: "envoy",
namespace: namespace,
},
rules: [
{
apiGroups: [""],
resources: ["services", "configmaps"],
verbs: ["get", "list", "patch", "update"],
},
],
}, // initClusterRoleBinding
envoyContainer(params):: {
image: params.image,
command: [
"/usr/local/bin/envoy",
"-c",
params.configPath,
"--log-level",
"info",
// Since we are running multiple instances of envoy on the same host we need to set a unique baseId
"--base-id",
params.baseId,
],
imagePullPolicy: "Always",
name: params.name,
livenessProbe: {
httpGet: {
path: params.healthPath,
port: params.healthPort,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
readinessProbe: {
httpGet: {
path: params.healthPath,
port: params.healthPort,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
ports: std.map(function(p)
{
containerPort: p,
}
, params.ports),
resources: {
limits: {
cpu: 1,
memory: "400Mi",
},
requests: {
cpu: "200m",
memory: "100Mi",
},
},
volumeMounts: [
{
mountPath: "/etc/envoy",
name: "shared",
},
],
}, // envoyContainer
deploy(image, oauthSecretName):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "envoy",
namespace: namespace,
},
spec: {
replicas: 3,
template: {
metadata: {
labels: {
service: "envoy",
},
},
spec: {
serviceAccountName: "envoy",
containers: [
$.parts(namespace).envoyContainer({
image: image,
name: "envoy",
// We use the admin port for the health, readiness check because the main port will require a valid JWT.
// healthPath: "/server_info",
healthPath: "/healthz",
healthPort: envoyPort,
configPath: "/etc/envoy/envoy-config.json",
baseId: "27000",
ports: [envoyPort, envoyAdminPort, envoyStatsPort],
}),
{
name: "iap",
image: "google/cloud-sdk:alpine",
command: [
"sh",
"/var/envoy-config/configure_envoy_for_iap.sh",
],
env: [
{
name: "NAMESPACE",
value: namespace,
},
{
name: "CLIENT_ID",
valueFrom: {
secretKeyRef: {
name: oauthSecretName,
key: "CLIENT_ID",
},
},
},
{
name: "CLIENT_SECRET",
valueFrom: {
secretKeyRef: {
name: oauthSecretName,
key: "CLIENT_SECRET",
},
},
},
{
name: "SERVICE",
value: "envoy",
},
{
name: "ENVOY_ADMIN",
value: "http://localhost:" + envoyAdminPort,
},
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/var/run/secrets/sa/admin-gcp-sa.json",
},
],
volumeMounts: [
{
mountPath: "/var/envoy-config/",
name: "config-volume",
},
{
mountPath: "/var/shared/",
name: "shared",
},
{
name: "sa-key",
readOnly: true,
mountPath: "/var/run/secrets/sa",
},
],
},
],
restartPolicy: "Always",
volumes: [
{
configMap: {
name: "envoy-config",
},
name: "config-volume",
},
{
emptyDir: {
medium: "Memory",
},
name: "shared",
},
{
name: "sa-key",
secret: {
secretName: "admin-gcp-sa",
},
},
],
},
},
},
}, // deploy
// Run the process to enable iap
iapEnabler(oauthSecretName):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "iap-enabler",
namespace: namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
service: "iap-enabler",
},
},
spec: {
serviceAccountName: "envoy",
containers: [
{
name: "iap",
image: "google/cloud-sdk:alpine",
command: [
"sh",
"/var/envoy-config/setup_iap.sh",
],
env: [
{
name: "NAMESPACE",
value: namespace,
},
{
name: "CLIENT_ID",
valueFrom: {
secretKeyRef: {
name: oauthSecretName,
key: "CLIENT_ID",
},
},
},
{
name: "CLIENT_SECRET",
valueFrom: {
secretKeyRef: {
name: oauthSecretName,
key: "CLIENT_SECRET",
},
},
},
{
name: "SERVICE",
value: "envoy",
},
{
name: "ENVOY_ADMIN",
value: "http://localhost:" + envoyAdminPort,
},
{
name: "GOOGLE_APPLICATION_CREDENTIALS",
value: "/var/run/secrets/sa/admin-gcp-sa.json",
},
],
volumeMounts: [
{
mountPath: "/var/envoy-config/",
name: "config-volume",
},
{
name: "sa-key",
readOnly: true,
mountPath: "/var/run/secrets/sa",
},
],
},
],
restartPolicy: "Always",
volumes: [
{
configMap: {
name: "envoy-config",
},
name: "config-volume",
},
{
name: "sa-key",
secret: {
secretName: "admin-gcp-sa",
},
},
],
},
},
},
}, // iapEnabler
configMap(disableJwt):: {
apiVersion: "v1",
kind: "ConfigMap",
metadata: {
name: "envoy-config",
namespace: namespace,
},
data: {
"envoy-config.json": std.manifestJson($.parts(namespace).envoyConfig(disableJwt)),
"setup_iap.sh": importstr "setup_iap.sh",
"configure_envoy_for_iap.sh": importstr "configure_envoy_for_iap.sh",
},
},
local envoyPort = 8080,
local envoyAdminPort = 8001,
local envoyStatsPort = 8025,
// This is the config for the secondary envoy proxy which does JWT verification
// and actually routes requests to the appropriate backend.
envoyConfig(disableJwt):: {
listeners: [
{
address: "tcp://0.0.0.0:" + envoyPort,
filters: [
{
type: "read",
name: "http_connection_manager",
config: {
codec_type: "auto",
stat_prefix: "ingress_http",
access_log: [
{
format: 'ACCESS [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%"\n',
path: "/dev/fd/1",
},
],
route_config: {
virtual_hosts: [
{
name: "backend",
domains: ["*"],
routes: [
// First route that matches is picked.
{
timeout_ms: 10000,
path: "/healthz",
prefix_rewrite: "/server_info",
weighted_clusters: {
clusters: [
{ name: "cluster_healthz", weight: 100.0 },
],
},
},
// Provide access to the whoami app skipping JWT verification.
// this is useful for debugging.
{
timeout_ms: 10000,
prefix: "/noiap/whoami",
prefix_rewrite: "/",
weighted_clusters: {
clusters: [
{
name: "cluster_iap_app",
weight: 100.0,
},
],
},
},
{
timeout_ms: 10000,
prefix: "/whoami",
prefix_rewrite: "/",
weighted_clusters: {
clusters: [
{
name: "cluster_iap_app",
weight: 100.0,
},
],
},
},
// Jupyter uses the prefixes /hub & /user
{
// JupyterHub requires the prefix /hub
// Use a 10 minute timeout because downloading
// images for jupyter notebook can take a while
timeout_ms: 600000,
prefix: "/hub",
prefix_rewrite: "/hub",
use_websocket: true,
weighted_clusters: {
clusters: [
{
name: "cluster_jupyterhub",
weight: 100.0,
},
],
},
},
{
// JupyterHub requires the prefix /user
// Use a 10 minute timeout because downloading
// images for jupyter notebook can take a while
timeout_ms: 600000,
prefix: "/user",
prefix_rewrite: "/user",
use_websocket: true,
weighted_clusters: {
clusters: [
{
name: "cluster_jupyterhub",
weight: 100.0,
},
],
},
},
// TFJob uses the prefix /tfjobs/
{
timeout_ms: 10000,
prefix: "/tfjobs",
prefix_rewrite: "/tfjobs",
weighted_clusters: {
clusters: [
{
name: "cluster_tfjobs",
weight: 100.0,
},
],
},
},
{
// Route remaining traffic to Ambassador which supports dynamically adding
// routes based on service annotations.
timeout_ms: 10000,
prefix: "/",
prefix_rewrite: "/",
use_websocket: true,
weighted_clusters: {
clusters: [
{
name: "cluster_ambassador",
weight: 100.0,
},
],
},
},
],
},
],
},
local authFilter = if disableJwt then
[]
else [{
type: "decoder",
name: "jwt-auth",
config: {
jwts: [
{
issuer: "https://cloud.google.com/iap",
audiences: "{{JWT_AUDIENCE}}",
jwks_uri: "https://www.gstatic.com/iap/verify/public_key-jwk",
jwks_uri_envoy_cluster: "iap_issuer",
jwt_headers: ["x-goog-iap-jwt-assertion"],
},
],
bypass_jwt: [
{
http_method: "GET",
path_exact: "/healthz",
},
{
http_method: "GET",
path_exact: "/noiap/whoami",
},
],
},
}],
filters:
authFilter +
[
{
type: "decoder",
name: "router",
config: {},
},
],
},
},
],
},
],
admin: {
// We use 0.0.0.0 and not 127.0.0.1 because we want the admin server to be available on all devices
// so that it can be used for health checking.
address: "tcp://0.0.0.0:" + envoyAdminPort,
access_log_path: "/tmp/admin_access_log",
},
cluster_manager: {
clusters: [
{
name: "cluster_healthz",
connect_timeout_ms: 3000,
type: "strict_dns",
lb_type: "round_robin",
hosts: [
{
// We just use the admin server for the health check
url: "tcp://127.0.0.1:" + envoyAdminPort,
},
],
},
{
name: "iap_issuer",
connect_timeout_ms: 5000,
type: "strict_dns",
circuit_breakers: {
default: {
max_pending_requests: 10000,
max_requests: 10000,
},
},
lb_type: "round_robin",
hosts: [
{
url: "tcp://www.gstatic.com:80",
},
],
},
{
name: "cluster_iap_app",
connect_timeout_ms: 3000,
type: "strict_dns",
lb_type: "round_robin",
hosts: [
{
url: "tcp://whoami-app." + namespace + ":80",
},
],
},
{
name: "cluster_jupyterhub",
connect_timeout_ms: 3000,
type: "strict_dns",
lb_type: "round_robin",
hosts: [
{
url: "tcp://tf-hub-lb." + namespace + ":80",
},
],
},
{
name: "cluster_tfjobs",
connect_timeout_ms: 3000,
type: "strict_dns",
lb_type: "round_robin",
hosts: [
{
url: "tcp://tf-job-dashboard." + namespace + ":80",
},
],
},
{
name: "cluster_ambassador",
connect_timeout_ms: 3000,
type: "strict_dns",
lb_type: "round_robin",
hosts: [
{
url: "tcp://ambassador." + namespace + ":80",
},
],
},
],
},
statsd_udp_ip_address: "127.0.0.1:" + envoyStatsPort,
stats_flush_interval_ms: 1000,
}, // envoyConfig
whoamiService:: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "whoami",
},
name: "whoami-app",
namespace: namespace,
},
spec: {
ports: [
{
port: 80,
targetPort: 8081,
},
],
selector: {
app: "whoami",
},
type: "ClusterIP",
},
}, // whoamiService
whoamiApp:: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "whoami-app",
namespace: namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "whoami",
},
},
spec: {
containers: [
{
env: [
{
name: "PORT",
value: "8081",
},
],
image: "gcr.io/cloud-solutions-group/esp-sample-app:1.0.0",
name: "app",
ports: [
{
containerPort: 8081,
},
],
readinessProbe: {
failureThreshold: 2,
httpGet: {
path: "/healthz",
port: 8081,
scheme: "HTTP",
},
periodSeconds: 10,
successThreshold: 1,
timeoutSeconds: 5,
},
},
],
},
},
},
},
ingress(secretName, ipName, hostname):: {
apiVersion: "extensions/v1beta1",
kind: "Ingress",
metadata: {
name: "envoy-ingress",
namespace: namespace,
annotations: {
"kubernetes.io/tls-acme": "true",
"ingress.kubernetes.io/ssl-redirect": "true",
"kubernetes.io/ingress.global-static-ip-name": ipName,
},
},
spec: {
rules: [
{
[if hostname != "null" then "host"]: hostname,
http: {
paths: [
{
backend: {
// Due to https://github.com/kubernetes/contrib/blob/master/ingress/controllers/gce/examples/health_checks/README.md#limitations
// Keep port the servicePort the same as the port we are targetting on the backend so that servicePort will be the same as targetPort for the purpose of
// health checking.
serviceName: "envoy",
servicePort: envoyPort,
},
path: "/*",
},
],
},
},
],
tls: [
{
secretName: secretName,
},
],
},
}, // iapIngress
certificate(secretName, hostname, issuer):: {
apiVersion: "certmanager.k8s.io/v1alpha1",
kind: "Certificate",
metadata: {
name: secretName,
namespace: namespace,
},
spec: {
secretName: secretName,
issuerRef: {
name: issuer,
},
commonName: hostname,
dnsNames: [
hostname,
],
acme: {
config: [
{
http01: {
ingress: "envoy-ingress",
},
domains: [
hostname,
],
},
],
},
},
}, // certificate
cloudEndpoint(params):: {
apiVersion: "ctl.isla.solutions/v1",
kind: "CloudEndpoint",
metadata: {
name: params.name,
namespace: namespace,
},
spec: {
project: params.project,
targetIngress: {
name: "envoy-ingress",
namespace: namespace,
},
},
}, // cloudEndpoint
}, // parts
}

View File

@ -1,315 +0,0 @@
{
all(params):: [
$.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks),
$.parts(params.namespace).jupyterHubService,
$.parts(params.namespace).jupyterHubLoadBalancer(params.jupyterHubServiceType),
$.parts(params.namespace).jupyterHub(params.jupyterHubImage, params.jupyterNotebookPVCMount, params.cloud, params.jupyterNotebookRegistry, params.jupyterNotebookRepoName),
$.parts(params.namespace).jupyterHubRole,
$.parts(params.namespace).jupyterHubServiceAccount,
$.parts(params.namespace).jupyterHubRoleBinding,
],
parts(namespace):: {
jupyterHubConfigMap(jupyterHubAuthenticator, disks): {
local util = import "kubeflow/core/util.libsonnet",
local diskNames = util.toArray(disks),
local kubeSpawner = $.parts(namespace).kubeSpawner(jupyterHubAuthenticator, diskNames),
result:: $.parts(namespace).jupyterHubConfigMapWithSpawner(kubeSpawner),
}.result,
kubeSpawner(authenticator, volumeClaims=[]): {
// TODO(jlewi): We should make whether we use PVC configurable.
local baseKubeConfigSpawner = importstr "kubeform_spawner.py",
authenticatorOptions:: {
//## Authenticator Options
local kubeConfigDummyAuthenticator = "c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator'",
// This configuration allows us to use the id provided by IAP.
local kubeConfigIAPAuthenticator = @"c.JupyterHub.authenticator_class ='jhub_remote_user_authenticator.remote_user_auth.RemoteUserAuthenticator'
c.RemoteUserAuthenticator.header_name = 'x-goog-authenticated-user-email'",
options:: std.join("\n", std.prune([
"######## Authenticator ######",
if authenticator == "iap" then
kubeConfigIAPAuthenticator else
kubeConfigDummyAuthenticator,
])),
}.options, // authenticatorOptions
volumeOptions:: {
local volumes = std.map(function(v)
{
name: v,
persistentVolumeClaim: {
claimName: v,
},
}, volumeClaims),
local volumeMounts = std.map(function(v)
{
mountPath: "/mnt/" + v,
name: v,
}, volumeClaims),
options::
if std.length(volumeClaims) > 0 then
// we need to merge the PVC from the spawner config
// with any added by a provisioner
std.join("\n",
[
"###### Volumes #######",
"c.KubeSpawner.volumes.extend(" + std.manifestPython(volumes) + ")",
"c.KubeSpawner.volume_mounts.extend(" + std.manifestPython(volumeMounts) + ")",
])
else "",
}.options, // volumeOptions
spawner:: std.join("\n", std.prune([baseKubeConfigSpawner, self.authenticatorOptions, self.volumeOptions])),
}.spawner, // kubeSpawner
local baseJupyterHubConfigMap = {
apiVersion: "v1",
kind: "ConfigMap",
metadata: {
name: "jupyterhub-config",
namespace: namespace,
},
},
jupyterHubConfigMapWithSpawner(spawner): baseJupyterHubConfigMap {
data: {
"jupyterhub_config.py": spawner,
},
},
jupyterHubService: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "tf-hub",
},
name: "tf-hub-0",
namespace: namespace,
},
spec: {
// We want a headless service so we set the ClusterIP to be None.
// This headless server is used by individual Jupyter pods to connect back to the Hub.
clusterIP: "None",
ports: [
{
name: "hub",
port: 8000,
},
],
selector: {
app: "tf-hub",
},
},
},
jupyterHubLoadBalancer(serviceType): {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "tf-hub-lb",
},
name: "tf-hub-lb",
namespace: namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tf-hub-lb-hub-mapping",
"prefix: /hub/",
"rewrite: /hub/",
"timeout_ms: 300000",
"service: tf-hub-lb." + namespace,
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tf-hub-lb-user-mapping",
"prefix: /user/",
"rewrite: /user/",
"timeout_ms: 300000",
"service: tf-hub-lb." + namespace,
]),
}, //annotations
},
spec: {
ports: [
{
name: "hub",
port: 80,
targetPort: 8000,
},
],
selector: {
app: "tf-hub",
},
type: serviceType,
},
},
// image: Image for JupyterHub
jupyterHub(image, notebookPVCMount, cloud, registry, repoName): {
apiVersion: "apps/v1beta1",
kind: "StatefulSet",
metadata: {
name: "tf-hub",
namespace: namespace,
},
spec: {
replicas: 1,
serviceName: "",
template: {
metadata: {
labels: {
app: "tf-hub",
},
},
spec: {
containers: [
{
command: [
"jupyterhub",
"-f",
"/etc/config/jupyterhub_config.py",
],
image: image,
name: "tf-hub",
volumeMounts: [
{
mountPath: "/etc/config",
name: "config-volume",
},
],
ports: [
// Port 8000 is used by the hub to accept incoming requests.
{
containerPort: 8000,
},
// Port 8081 accepts callbacks from the individual Jupyter pods.
{
containerPort: 8081,
},
],
env: [
{
name: "NOTEBOOK_PVC_MOUNT",
value: notebookPVCMount,
},
{
name: "CLOUD_NAME",
value: cloud,
},
{
name: "REGISTRY",
value: registry,
},
{
name: "REPO_NAME",
value: repoName,
},
],
}, // jupyterHub container
],
serviceAccountName: "jupyter-hub",
volumes: [
{
configMap: {
name: "jupyterhub-config",
},
name: "config-volume",
},
],
},
},
updateStrategy: {
type: "RollingUpdate",
},
},
},
// contents based on https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/master/jupyterhub/templates/hub/rbac.yaml
jupyterHubRole: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: "jupyter-role",
namespace: namespace,
},
rules: [
{
apiGroups: [
"",
],
resources: [
"pods",
"persistentvolumeclaims",
],
verbs: [
"get",
"watch",
"list",
"create",
"delete",
],
},
{
apiGroups: [
"",
],
resources: [
"events",
],
verbs: [
"get",
"watch",
"list",
],
},
],
},
jupyterHubServiceAccount: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "jupyter-hub",
},
name: "jupyter-hub",
namespace: namespace,
},
},
jupyterHubRoleBinding: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: "jupyter-role",
namespace: namespace,
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: "jupyter-role",
},
subjects: [
{
kind: "ServiceAccount",
name: "jupyter-hub",
namespace: namespace,
},
],
},
}, // parts
}

View File

@ -1,165 +0,0 @@
import json
import os
from kubespawner.spawner import KubeSpawner
from jhub_remote_user_authenticator.remote_user_auth import RemoteUserAuthenticator
from oauthenticator.github import GitHubOAuthenticator
class KubeFormSpawner(KubeSpawner):
# relies on HTML5 for image datalist
def _options_form_default(self):
global registry, repoName
return '''
<label for='image'>Image</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input list="image" name="image" placeholder='repo/image:tag'>
<datalist id="image">
<option value="{0}/{1}/tensorflow-1.4.1-notebook-cpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.4.1-notebook-gpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.5.1-notebook-cpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.5.1-notebook-gpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.6.0-notebook-cpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.6.0-notebook-gpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.7.0-notebook-cpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.7.0-notebook-gpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.8.0-notebook-cpu:v0.2.0">
<option value="{0}/{1}/tensorflow-1.8.0-notebook-gpu:v0.2.0">
</datalist>
<br/><br/>
<label for='cpu_guarantee'>CPU</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input name='cpu_guarantee' placeholder='200m, 1.0, 2.5, etc'></input>
<br/><br/>
<label for='mem_guarantee'>Memory</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input name='mem_guarantee' placeholder='100Mi, 1.5Gi'></input>
<br/><br/>
<label for='extra_resource_limits'>Extra Resource Limits</label>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<input name='extra_resource_limits' placeholder='{{&quot;nvidia.com/gpu&quot;: 3}}'></input>
<br/><br/>
'''.format(registry, repoName)
def options_from_form(self, formdata):
options = {}
options['image'] = formdata.get('image', [''])[0].strip()
options['cpu_guarantee'] = formdata.get(
'cpu_guarantee', [''])[0].strip()
options['mem_guarantee'] = formdata.get(
'mem_guarantee', [''])[0].strip()
options['extra_resource_limits'] = formdata.get(
'extra_resource_limits', [''])[0].strip()
return options
@property
def singleuser_image_spec(self):
global cloud
if cloud == 'ack':
image = 'registry.aliyuncs.com/kubeflow-images-public/tensorflow-notebook-cpu'
else:
image = 'gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.2.0'
if self.user_options.get('image'):
image = self.user_options['image']
return image
@property
def cpu_guarantee(self):
cpu = '500m'
if self.user_options.get('cpu_guarantee'):
cpu = self.user_options['cpu_guarantee']
return cpu
@property
def mem_guarantee(self):
mem = '1Gi'
if self.user_options.get('mem_guarantee'):
mem = self.user_options['mem_guarantee']
return mem
@property
def extra_resource_limits(self):
extra = ''
if self.user_options.get('extra_resource_limits'):
extra = json.loads(self.user_options['extra_resource_limits'])
return extra
###################################################
# JupyterHub Options
###################################################
c.JupyterHub.ip = '0.0.0.0'
c.JupyterHub.hub_ip = '0.0.0.0'
# Don't try to cleanup servers on exit - since in general for k8s, we want
# the hub to be able to restart without losing user containers
c.JupyterHub.cleanup_servers = False
###################################################
###################################################
# Spawner Options
###################################################
cloud = os.environ.get('CLOUD_NAME')
registry = os.environ.get('REGISTRY')
repoName = os.environ.get('REPO_NAME')
c.JupyterHub.spawner_class = KubeFormSpawner
c.KubeSpawner.singleuser_image_spec = '{0}/{1}/tensorflow-notebook'.format(registry, repoName)
c.KubeSpawner.cmd = 'start-singleuser.sh'
c.KubeSpawner.args = ['--allow-root']
# gpu images are very large ~15GB. need a large timeout.
c.KubeSpawner.start_timeout = 60 * 30
# Increase timeout to 5 minutes to avoid HTTP 500 errors on JupyterHub
c.KubeSpawner.http_timeout = 60 * 5
# Volume setup
c.KubeSpawner.singleuser_uid = 1000
c.KubeSpawner.singleuser_fs_gid = 100
c.KubeSpawner.singleuser_working_dir = '/home/jovyan'
volumes = []
volume_mounts = []
###################################################
# Persistent volume options
###################################################
# Using persistent storage requires a default storage class.
# TODO(jlewi): Verify this works on minikube.
# see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944
pvc_mount = os.environ.get('NOTEBOOK_PVC_MOUNT')
if pvc_mount and pvc_mount != 'null':
c.KubeSpawner.user_storage_pvc_ensure = True
# How much disk space do we want?
c.KubeSpawner.user_storage_capacity = '10Gi'
c.KubeSpawner.pvc_name_template = 'claim-{username}{servername}'
volumes.append(
{
'name': 'volume-{username}{servername}',
'persistentVolumeClaim': {
'claimName': 'claim-{username}{servername}'
}
}
)
volume_mounts.append(
{
'mountPath': pvc_mount,
'name': 'volume-{username}{servername}'
}
)
# ###################################################
# ### Extra volumes for NVIDIA drivers (Azure)
# ###################################################
# # Temporary fix:
# # AKS / acs-engine doesn't yet use device plugin so we have to mount the drivers to use GPU
# # TODO(wbuchwalter): Remove once device plugin is merged
if cloud == 'aks' or cloud == 'acsengine':
volumes.append({
'name': 'nvidia',
'hostPath': {
'path': '/usr/local/nvidia'
}
})
volume_mounts.append({
'name': 'nvidia',
'mountPath': '/usr/local/nvidia'
})
c.KubeSpawner.volumes = volumes
c.KubeSpawner.volume_mounts = volume_mounts

View File

@ -1,302 +0,0 @@
// A ksonnet prototype/component for using NFS.
{
// TODO(https://github.com/ksonnet/ksonnet/issues/222): Taking namespace as an argument is a work around for the fact that ksonnet
// doesn't support automatically piping in the namespace from the environment to prototypes.
//
// Return a list of components needed if you want to mount some disks using NFS.
// diskNames should be a list of PDs.
all(params):: {
local namespace = params.namespace,
local name = params.name,
local disks = params.disks,
// Create a list of the resources needed for a particular disk
local diskToList = function(diskName) [
$.parts(namespace, name,).diskResources(diskName).storageClass,
$.parts(namespace, name,).diskResources(diskName).volumeClaim,
$.parts(namespace, name,).diskResources(diskName).service,
$.parts(namespace, name,).diskResources(diskName).provisioner,
],
local util = import "kubeflow/core/util.libsonnet",
local allDisks = std.flattenArrays(std.map(diskToList, util.toArray(disks))),
items::
if std.length(allDisks) > 0 then
[
$.parts(namespace, name).serviceAccount,
$.parts(namespace, name).role,
$.parts(namespace, name).roleBinding,
$.parts(namespace, name).clusterRoleBinding,
] + allDisks
else
[],
}.items,
// Create a provisioner with the specified name.
// disks should be a list GCP persistent disk names; these disks should be in the
// same zone as your cluster.
// TODO(jlewi):
parts(namespace, name):: {
local serviceAccountName = name,
local serviceAccountRoleName = name,
// Create the resources for a specific disk.
// Each NFS Provisioner can only manage 1 PD so we need to create one for each disk.
diskResources(diskName): {
local storageClassName = diskName + "-nfs",
local provisionerName = diskName + "-provisioner",
local storageClassProvisioner = diskName + "/nfs",
local serviceName = diskName + "-service",
volumeClaim: {
apiVersion: "v1",
kind: "PersistentVolumeClaim",
metadata: {
annotations: {
"volume.beta.kubernetes.io/storage-class": storageClassName,
},
name: diskName,
namespace: namespace,
},
spec: {
accessModes: [
"ReadWriteMany",
],
resources: {
requests: {
storage: "1Mi",
},
},
},
},
// TODO(jlewi): Is storageClass actually name space scoped? Seems to show up in default namespace as well.
// TODO(jlewi): Could we just use the default cluster storage class?
storageClass: {
apiVersion: "storage.k8s.io/v1beta1",
kind: "StorageClass",
metadata: {
name: storageClassName,
namespace: namespace,
},
// This value must be the same as passed as argument --provisioner to the provisioner
provisioner: storageClassProvisioner,
},
service: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: provisionerName,
},
name: serviceName,
namespace: namespace,
},
spec: {
ports: [
{
name: "nfs",
port: 2049,
},
{
name: "mountd",
port: 20048,
},
{
name: "rpcbind",
port: 111,
},
{
name: "rpcbind-udp",
port: 111,
protocol: "UDP",
},
],
selector: {
app: provisionerName,
},
},
},
provisioner: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: provisionerName,
namespace: namespace,
},
spec: {
replicas: 1,
strategy: {
type: "Recreate",
},
template: {
metadata: {
labels: {
app: provisionerName,
},
},
spec: {
containers: [
{
args: [
"-provisioner=" + storageClassProvisioner,
],
env: [
{
name: "POD_IP",
valueFrom: {
fieldRef: {
fieldPath: "status.podIP",
},
},
},
{
name: "SERVICE_NAME",
value: serviceName,
},
{
name: "POD_NAMESPACE",
valueFrom: {
fieldRef: {
fieldPath: "metadata.namespace",
},
},
},
],
image: "quay.io/kubernetes_incubator/nfs-provisioner:v1.0.8",
imagePullPolicy: "IfNotPresent",
name: "nfs-provisioner",
ports: [
{
containerPort: 2049,
name: "nfs",
},
{
containerPort: 20048,
name: "mountd",
},
{
containerPort: 111,
name: "rpcbind",
},
{
containerPort: 111,
name: "rpcbind-udp",
protocol: "UDP",
},
],
securityContext: {
capabilities: {
add: [
"DAC_READ_SEARCH",
],
},
},
volumeMounts: [{
// Needs to be mounted under /export because /export is what is exported for NFS.
// https://github.com/kubernetes-incubator/external-storage/tree/master/nfs#quickstart
mountPath: "/export",
name: diskName,
}],
},
],
volumes: [{
name: diskName,
gcePersistentDisk: {
pdName: diskName,
},
}],
serviceAccountName: serviceAccountName,
},
},
},
}, // provisioner
},
serviceAccount: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: name + "nfs-provisioner",
},
name: serviceAccountName,
namespace: namespace,
},
},
role: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: serviceAccountRoleName,
namespace: namespace,
},
rules: [
{
apiGroups: [
"*",
],
// TODO(jlewi): This is very permissive so we may want to lock this down.
resources: [
"*",
],
verbs: [
"*",
],
},
],
},
roleBinding: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: name + "-nfs-role",
namespace: namespace,
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: serviceAccountName,
},
subjects: [
{
kind: "ServiceAccount",
name: serviceAccountRoleName,
namespace: namespace,
},
],
},
// see https://github.com/kubernetes-incubator/external-storage/tree/master/docs#authorizing-provisioners-for-rbac-or-openshift
clusterRoleBinding: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
name: name + "-nfs-role",
namespace: namespace,
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "system:persistent-volume-provisioner",
},
subjects: [
{
kind: "ServiceAccount",
name: serviceAccountRoleName,
namespace: namespace,
},
],
},
}, // parts
}

View File

@ -1,35 +0,0 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.kubeflow-core
// @description Kubeflow core components
// @shortDescription Kubeflow core components. This currently includes JupyterHub and the TfJob controller.
// @param name string Name to give to each of the components
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
// @optionalParam disks string null Comma separated list of Google persistent disks to attach to jupyter environments.
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
// @optionalParam AmbassadorServiceType string ClusterIP The service type for the API Gateway.
// @optionalParam AmbassadorImage string quay.io/datawire/ambassador:0.30.1 The image for the API Gateway.
// @optionalParam StatsdImage string quay.io/datawire/statsd:0.30.1 The image for the Stats and Monitoring.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v0.2.0 The image for the TfJob controller.
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
// @optionalParam jupyterHubServiceType string ClusterIP The service type for Jupyterhub.
// @optionalParam jupyterHubImage string gcr.io/kubeflow/jupyterhub-k8s:v20180531-3bb991b1 The image to use for JupyterHub.
// @optionalParam jupyterHubAuthenticator string null The authenticator to use
// @optionalParam jupyterNotebookPVCMount string null Mount path for PVC. Set empty to disable PVC
// @optionalParam jupyterNotebookRegistry string gcr.io The docker image registry for JupyterNotebook.
// @optionalParam jupyterNotebookRepoName string kubeflow-images-public The repoistory name for JupyterNotebook.
// @optionalParam reportUsage string false Whether or not to report Kubeflow usage to kubeflow.org.
// @optionalParam usageId string unknown_cluster Optional id to use when reporting usage to kubeflow.org
// @optionalParam tfJobVersion string v1alpha2 which version of the TFJob operator to use
// @optionalParam centralUiImage string gcr.io/kubeflow-images-public/centraldashboard:v20180618-v0.2.0-rc.0-5-g715aafc8-e3b0c4 Image to use for Central UI.
local k = import "k.libsonnet";
local all = import "kubeflow/core/all.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))

View File

@ -1,19 +0,0 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.cert-manager
// @description Provides cert-manager prototypes for generating SSL certificates.
// @shortDescription Certificate generation on GKE.
// @param name string Name for the component
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
// @param acmeEmail string The Lets Encrypt account email address
// @optionalParam acmeUrl string https://acme-v01.api.letsencrypt.org/directory The ACME server URL, set to https://acme-staging.api.letsencrypt.org/directory for staging API.
local k = import "k.libsonnet";
local certManager = import "kubeflow/core/cert-manager.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
certManager.parts(updatedParams.namespace).certManagerParts(params.acmeEmail, params.acmeUrl)

View File

@ -1,19 +0,0 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.cloud-endpoints
// @description Provides cloud-endpoints prototypes for creating Cloud Endpoints services and DNS records.
// @shortDescription Cloud Endpoint domain creation.
// @param name string Name for the component
// @optionalParam secretName string admin-gcp-sa Name of secret containing the json service account key.
// @optionalParam secretKey string admin-gcp-sa.json Name of the key in the secret containing the JSON service account key.
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
local k = import "k.libsonnet";
local cloudEndpoints = import "kubeflow/core/cloud-endpoints.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
cloudEndpoints.parts(updatedParams.namespace).cloudEndpointsParts(params.secretName, params.secretKey)

View File

@ -1,28 +0,0 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.iap-ingress
// @description Provides ingress prototypes for setting up IAP on GKE.
// @shortDescription Ingress for IAP on GKE.
// @param name string Name for the component
// @param ipName string The name of the global ip address to use.
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
// @optionalParam secretName string envoy-ingress-tls The name of the secret containing the SSL certificates.
// @optionalParam hostname string null The hostname associated with this ingress. Eg: mykubeflow.example.com
// @optionalParam issuer string letsencrypt-prod The cert-manager issuer name.
// @optionalParam envoyImage string gcr.io/kubeflow-images-public/envoy:v20180309-0fb4886b463698702b6a08955045731903a18738 The image for envoy.
// @optionalParam disableJwtChecking string false Disable JWT checking.
// @optionalParam oauthSecretName string kubeflow-oauth The name of the secret containing the OAuth CLIENT_ID and CLIENT_SECRET.
local k = import "k.libsonnet";
local iap = import "kubeflow/core/iap.libsonnet";
local util = import "kubeflow/core/util.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
local namespace = updatedParams.namespace;
local disableJwtChecking = util.toBool(params.disableJwtChecking);
iap.parts(namespace).ingressParts(params.secretName, params.ipName, params.hostname, params.issuer, params.envoyImage, disableJwtChecking, params.oauthSecretName)

View File

@ -1,26 +0,0 @@
// @apiVersion 1
// @name io.ksonnet.pkg.tensorboard
// @description Tensorboard components
// @shortDescription ksonnet components for Tensorboard
// @param name string Name to give to each of the components
local k = import "k.libsonnet";
local tensorboard = import "kubeflow/core/tensorboard.libsonnet";
local name = import "param://name";
// updatedParams includes the namespace from env by default.
// We can override namespace in params if needed
local updatedParams = env + params;
local logDir = updatedParams.logDir;
local tb = tensorboard {
params+: updatedParams {
name: name,
},
};
//std.assertEqual(true, std.length(logDir) > 0)
std.prune(k.core.v1.list.new(tb.components))

View File

@ -1,27 +0,0 @@
// @apiVersion 0.1
// @name io.ksonnet.pkg.tf-job-operator
// @description A TensorFlow job operator CRD
// @shortDescription A TensorFlow job operator.
// @param name string Name to give to each of the components
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
// @optionalParam tfAmbassadorServiceType string ClusterIP The service type for the API Gateway.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v0.2.0 The image for the TfJob controller.
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
// @optionalParam tfJobVersion string v1alpha2 which version of the TFJob operator to use
// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--".
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["tf-job-operator"];
local k = import "k.libsonnet";
local tfjob = import "kubeflow/core/tf-job-operator.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
std.prune(k.core.v1.list.new(tfjob.all(updatedParams)))

View File

@ -1,125 +0,0 @@
#!/bin/bash
#
# A simple shell script to enable IAP and configure timeouts by using gcloud.
[ -z ${CLIENT_ID} ] && echo Error CLIENT_ID must be set && exit 1
[ -z ${CLIENT_SECRET} ] && echo Error CLIENT_SECRET must be set && exit 1
[ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1
[ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1
apk add --update jq
curl https://storage.googleapis.com/kubernetes-release/release/v1.9.4/bin/linux/amd64/kubectl > /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl
# Stagger init of replicas when acquiring lock
sleep $(( $RANDOM % 5 + 1 ))
# We acquire a lock because we want to ensure there is a single process
# trying to modify the backend at a time.
kubectl get svc ${SERVICE} -o json > service.json
LOCK=$(jq -r ".metadata.annotations.iaplock" service.json)
NOW=$(date -u +'%s')
if [[ -z "${LOCK}" || "${LOCK}" == "null" ]]; then
LOCK_T=$NOW
else
LOCK_T=$(echo "${LOCK}" | cut -d' ' -f2)
fi
LOCK_AGE=$(( $NOW - $LOCK_T ))
LOCK_TTL=120
if [[ -z "${LOCK}" || "${LOCK}" == "null" || "${LOCK_AGE}" -gt "${LOCK_TTL}" ]]; then
jq -r ".metadata.annotations.iaplock=\"$(hostname -s) ${NOW}\"" service.json > service_lock.json
kubectl apply -f service_lock.json 2>/dev/null
if [[ $? -eq 0 ]]; then
echo "Acquired lock on service annotation to update IAP."
else
echo "WARN: Failed to acquire lock on service annotation."
exit 1
fi
else
echo "WARN: Lock on service annotation already acquired by: $LOCK, age: $LOCK_AGE, TTL: $LOCK_TTL"
sleep 20
exit 1
fi
PROJECT=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
if [ -z ${PROJECT} ]; then
echo Error unable to fetch PROJECT from compute metadata
exit 1
fi
PROJECT_NUM=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/numeric-project-id)
if [ -z ${PROJECT_NUM} ]; then
echo Error unable to fetch PROJECT_NUM from compute metadata
exit 1
fi
# Activate the service account
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
# Print out the config for debugging
gcloud config list
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
while [[ -z ${BACKEND_ID} ]];
do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)');
echo "Waiting for backend id PROJECT=${PROJECT} NAMESPACE=${NAMESPACE} SERVICE=${SERVICE} filter=name~k8s-be-${NODE_PORT}- ...";
sleep 2;
done
echo BACKEND_ID=${BACKEND_ID}
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri)
# Enable IAP on the backend service:
gcloud --project=${PROJECT} compute backend-services update ${BACKEND_SERVICE} \
--global \
--iap=enabled,oauth2-client-id=${CLIENT_ID},oauth2-client-secret=${CLIENT_SECRET}
while [[ -z ${HEALTH_CHECK_URI} ]];
do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~k8s-be-${NODE_PORT}- --uri);
echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}...";
sleep 2;
done
# Since we create the envoy-ingress ingress object before creating the envoy
# deployment object, healthcheck will not be configured correctly in the GCP
# load balancer. It will default the healthcheck request path to a value of
# / instead of the intended /healthz.
# Manually update the healthcheck request path to /healthz
gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz
# Since JupyterHub uses websockets we want to increase the backend timeout
echo Increasing backend timeout for JupyterHub
gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600
JWT_AUDIENCE="/projects/${PROJECT_NUM}/global/backendServices/${BACKEND_ID}"
# For healthcheck compare.
mkdir -p /var/shared
echo "JWT_AUDIENCE=${JWT_AUDIENCE}" > /var/shared/healthz.env
echo "NODE_PORT=${NODE_PORT}" >> /var/shared/healthz.env
echo "BACKEND_ID=${BACKEND_ID}" >> /var/shared/healthz.env
# TODO(https://github.com/kubeflow/kubeflow/issues/942): We should publish the modified envoy
# config as a config map and use that in the envoy sidecars.
kubectl get configmap -n ${NAMESPACE} envoy-config -o jsonpath='{.data.envoy-config\.json}' | \
sed -e "s|{{JWT_AUDIENCE}}|${JWT_AUDIENCE}|g" > /var/shared/envoy-config.json
echo "Clearing lock on service annotation"
kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"iaplock\": \"\" }}}"
function checkIAP() {
# created by init container.
. /var/shared/healthz.env
# If node port or backend id change, so does the JWT audience.
CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
CURR_BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id)')
[ "$BACKEND_ID" == "$CURR_BACKEND_ID" ]
}
# Verify IAP every 10 seconds.
while true; do
if ! checkIAP; then
echo "$(date) WARN: IAP check failed, restarting container."
exit 1
fi
sleep 10
done

View File

@ -1,113 +0,0 @@
{
local util = import "kubeflow/core/util.libsonnet",
all(params):: {
local reportUsageBool = util.toBool(params.reportUsage),
result:: if reportUsageBool then
[
$.parts(params.namespace).role,
$.parts(params.namespace).roleBinding,
$.parts(params.namespace).serviceAccount,
$.parts(params.namespace).deployment(params.usageId),
]
else [],
}.result,
parts(namespace):: {
// Spartakus needs to be able to get information about the cluster in order to create a report.
role: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
},
rules: [
{
apiGroups: [
"",
],
resources: [
"nodes",
],
verbs: [
"get",
"list",
],
},
],
}, // role
roleBinding:: {
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "spartakus",
},
subjects: [
{
kind: "ServiceAccount",
name: "spartakus",
namespace: namespace,
},
],
}, // operator-role binding
serviceAccount: {
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
namespace: namespace,
},
},
deployment(usageId):: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "spartakus-volunteer",
namespace: namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "spartakus-volunteer",
},
},
spec: {
containers: [
{
image: "gcr.io/google_containers/spartakus-amd64:v1.0.0",
name: "volunteer",
args: [
"volunteer",
"--cluster-id=" + usageId,
"--database=https://stats-collector.kubeflow.org",
],
},
],
serviceAccountName: "spartakus",
}, // spec
},
},
}, // deployment
},
}

View File

@ -1,247 +0,0 @@
{
// Parameters are intended to be late bound.
params:: {
name: null,
labels: {
app: $.params.name,
},
serviceType: "ClusterIP",
logDir: "",
defaultTbImage: "gcr.io/tensorflow/tensorflow:latest",
// Whether or not to enable s3 parameters
s3Enable:: false,
// Which cloud to use
cloud:: null,
},
// Parametes specific to GCP.
gcpParams:: {
gcpCredentialSecretName: "",
} + $.params,
// Parameters that control S3 access
// params overrides s3params because params can be overwritten by the user to override the defaults.
s3params:: {
// Name of the k8s secrets containing S3 credentials
s3SecretName: "",
// Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
s3SecretAccesskeyidKeyName: "",
// Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
s3SecretSecretaccesskeyKeyName: "",
// S3 region
s3AwsRegion: "us-west-1",
// TODO(jlewi): We should use util.toBool to automatically conver to actual boolean values.
// The use of strings is left over from when they were prototype parameters which only supports string type.
// true Whether or not to use https for S3 connections
s3UseHttps: "true",
// Whether or not to verify https certificates for S3 connections
s3VerifySsl: "true",
// URL for your s3-compatible endpoint.
s3Endpoint: "http://s3.us-west-1.amazonaws.com,",
} + $.params,
components:: {
all::
// TODO(jlewi): It would be better to structure s3 as a mixin.
// As an example it would be great to allow S3 and GCS parameters
// to be enabled simultaneously. This should be doable because
// each entails adding a set of environment variables and volumes
// to the containers. These volumes/environment variables shouldn't
// overlap so there's no reason we shouldn't be able to just add
// both modifications to the base container.
// I think we want to restructure things as mixins so they can just
// be added.
if $.params.s3Enable then
[
$.s3parts.tb,
$.s3parts.tfDeployment,
]
else if $.params.cloud == "gcp" then
[
$.gcpParts.tb,
$.gcpParts.tfDeployment,
]
else
[
$.parts.tb,
$.parts.tfDeployment,
],
}.all,
parts:: {
// We define the containers one level beneath parts because combined with jsonnet late binding
// this makes it easy for users to override specific bits of the container.
tbContainer:: {
name: $.params.name,
image: $.params.defaultTbImage,
imagePullPolicy: "IfNotPresent",
args: [
$.params.logDir,
"--port=9000",
],
command: [
"/usr/local/bin/tensorboard",
],
ports: [
{
containerPort: 9000,
},
],
resources: {
requests: {
memory: "1Gi",
cpu: "1",
},
limits: {
memory: "4Gi",
cpu: "4",
},
},
}, // tbContainer
tfDeployment: {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: $.params.name,
namespace: $.params.namespace,
labels: $.params.labels,
},
spec: {
template: {
metadata: {
labels: $.params.labels,
},
spec: {
containers: [
$.parts.tbContainer,
],
},
},
},
}, // tfDeployment
tb: {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: $.params.labels,
name: $.params.name,
namespace: $.params.namespace,
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tb-mapping-" + $.params.name + "-get",
"prefix: /tensorboard/ " + $.params.name + "/",
"rewrite: /",
"method: GET",
"service: " + $.params.name + "." + $.params.namespace + ":9000",
]),
}, //annotations
},
spec: {
ports: [
{
name: "tb",
port: 9000,
targetPort: 9000,
},
],
selector: $.params.labels,
type: $.params.serviceType,
},
}, // tb
}, // parts
// Parts specific to S3
s3parts:: $.parts {
s3Env:: [
{ name: "AWS_ACCESS_KEY_ID", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretAccesskeyidKeyName } } },
{ name: "AWS_SECRET_ACCESS_KEY", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretSecretaccesskeyKeyName } } },
{ name: "AWS_REGION", value: $.s3params.s3AwsRegion },
{ name: "S3_REGION", value: $.s3params.s3AwsRegion },
{ name: "S3_USE_HTTPS", value: $.s3params.s3UseHttps },
{ name: "S3_VERIFY_SSL", value: $.s3params.s3VerifySsl },
{ name: "S3_ENDPOINT", value: $.s3params.s3Endpoint },
],
tbContainer: $.parts.tbContainer {
env+: $.s3parts.s3Env,
},
tfDeployment: $.parts.tfDeployment {
spec: +{
template: +{
spec: +{
containers: [
$.s3parts.tbContainer,
],
},
},
},
}, // tfDeployment
}, // s3parts
// Parts specific to GCP
gcpParts:: $.parts {
gcpEnv:: [
if $.gcpParams.gcpCredentialSecretName != "" then
{ name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" },
],
tbContainer: $.parts.tbContainer {
env+: $.gcpParts.gcpEnv,
volumeMounts+: [
if $.gcpParams.gcpCredentialSecretName != "" then
{
name: "gcp-credentials",
mountPath: "/secret/gcp-credentials",
},
],
},
tfDeployment: $.parts.tfDeployment {
spec+: {
template+: {
spec+: {
containers: [
$.gcpParts.tbContainer,
],
volumes: [
if $.gcpParams.gcpCredentialSecretName != "" then
{
name: "gcp-credentials",
secret: {
secretName: $.gcpParams.gcpCredentialSecretName,
},
},
],
},
},
},
}, // tfDeployment
}, // gcpParts
}

View File

@ -1,259 +0,0 @@
local ambassador = import "../ambassador.libsonnet";
local params = {
namespace:: "test-kf-001",
tfAmbassadorServiceType:: "ClusterIP",
tfAmbassadorImage:: "quay.io/datawire/ambassador:0.34.0",
tfStatsdImage:: "quay.io/datawire/statsd:0.34.0",
};
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).service(params.tfAmbassadorServiceType),
{
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "ambassador",
},
name: "ambassador",
namespace: "test-kf-001",
},
spec: {
ports: [
{
name: "ambassador",
port: 80,
targetPort: 80,
},
],
selector: {
service: "ambassador",
},
type: "ClusterIP",
},
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).adminService,
{
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "ambassador-admin",
},
name: "ambassador-admin",
namespace: "test-kf-001",
},
spec: {
ports: [
{
name: "ambassador-admin",
port: 8877,
targetPort: 8877,
},
],
selector: {
service: "ambassador",
},
type: "ClusterIP",
},
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).role,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: "ambassador",
namespace: "test-kf-001",
},
rules: [
{
apiGroups: [
"",
],
resources: [
"services",
],
verbs: [
"get",
"list",
"watch",
],
},
{
apiGroups: [
"",
],
resources: [
"configmaps",
],
verbs: [
"create",
"update",
"patch",
"get",
"list",
"watch",
],
},
{
apiGroups: [
"",
],
resources: [
"secrets",
],
verbs: [
"get",
"list",
"watch",
],
},
],
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).serviceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "ambassador",
namespace: "test-kf-001",
},
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).roleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: "ambassador",
namespace: "test-kf-001",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: "ambassador",
},
subjects: [
{
kind: "ServiceAccount",
name: "ambassador",
namespace: "test-kf-001",
},
],
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).deploy(params.tfStatsdImage),
{
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "ambassador",
namespace: "test-kf-001",
},
spec: {
replicas: 3,
template: {
metadata: {
labels: {
service: "ambassador",
},
namespace: "test-kf-001",
},
spec: {
containers: [
{
env: [
{
name: "AMBASSADOR_NAMESPACE",
valueFrom: {
fieldRef: {
fieldPath: "metadata.namespace",
},
},
},
{
name: "AMBASSADOR_SINGLE_NAMESPACE",
value: "true",
},
],
image: "quay.io/datawire/ambassador:0.34.0",
livenessProbe: {
httpGet: {
path: "/ambassador/v0/check_alive",
port: 8877,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
name: "ambassador",
readinessProbe: {
httpGet: {
path: "/ambassador/v0/check_ready",
port: 8877,
},
initialDelaySeconds: 30,
periodSeconds: 30,
},
resources: {
limits: {
cpu: 1,
memory: "400Mi",
},
requests: {
cpu: "200m",
memory: "100Mi",
},
},
},
{
image: "quay.io/datawire/statsd:0.34.0",
name: "statsd",
},
],
restartPolicy: "Always",
serviceAccountName: "ambassador",
},
},
},
}
) &&
std.assertEqual(
ambassador.parts(params.namespace, params.tfAmbassadorImage).k8sDashboard("cloud"),
{
apiVersion: "v1",
kind: "Service",
metadata: {
annotations: {
"getambassador.io/config": "---\napiVersion: ambassador/v0\nkind: Mapping\nname: k8s-dashboard-ui-mapping\nprefix: /k8s/ui/\nrewrite: /\ntls: true\nservice: kubernetes-dashboard.kube-system",
},
name: "k8s-dashboard",
namespace: "test-kf-001",
},
spec: {
ports: [
{
port: 443,
targetPort: 8443,
},
],
selector: {
"k8s-app": "kubernetes-dashboard",
},
type: "ClusterIP",
},
}
)

View File

@ -1,161 +0,0 @@
local centraldashboard = import "../centraldashboard.libsonnet";
local params = {
namespace:: "kubeflow",
cloud:: "gke",
};
std.assertEqual(
centraldashboard.parts(params.namespace).deployUi("dashboard/image:latest"),
{
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: "kubeflow",
},
spec: {
template: {
metadata: {
labels: {
app: "centraldashboard",
},
},
spec: {
containers: [
{
image: "dashboard/image:latest",
name: "centraldashboard",
ports: [
{
containerPort: 8082,
},
],
},
],
serviceAccountName: "centraldashboard",
},
},
},
}
) &&
std.assertEqual(
centraldashboard.parts(params.namespace).uiService,
{
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: "kubeflow",
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: centralui-mapping",
"prefix: /",
"rewrite: /",
"service: centraldashboard." + "kubeflow",
]),
},
},
spec: {
ports: [
{
port: 80,
targetPort: 8082,
},
],
selector: {
app: "centraldashboard",
},
sessionAffinity: "None",
type: "ClusterIP",
},
},
) &&
std.assertEqual(
centraldashboard.parts(params.namespace).uiServiceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
name: "centraldashboard",
namespace: "kubeflow",
},
},
) &&
std.assertEqual(
centraldashboard.parts(params.namespace).uiRole,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: "kubeflow",
},
rules: [
{
apiGroups: [""],
resources: [
"pods",
"pods/exec",
"pods/log",
],
verbs: [
"get",
"list",
"watch",
],
},
{
apiGroups: [""],
resources: [
"secrets",
],
verbs: [
"get",
],
},
],
},
) &&
std.assertEqual(
centraldashboard.parts(params.namespace).uiRoleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
labels: {
app: "centraldashboard",
},
name: "centraldashboard",
namespace: "kubeflow",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "centraldashboard",
},
subjects: [
{
kind: "ServiceAccount",
name: "centraldashboard",
namespace: "kubeflow",
},
],
}
)

View File

@ -1,203 +0,0 @@
local iap = import "../iap.libsonnet";
std.assertEqual(iap.parts("namespace").service, {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
service: "envoy",
},
name: "envoy",
namespace: "namespace",
},
spec: {
ports: [
{
name: "envoy",
port: 8080,
targetPort: 8080,
},
],
selector: {
service: "envoy",
},
type: "NodePort",
},
}) &&
std.assertEqual(iap.parts("namespace").ingress("secretName", "ipName", "hostname"), {
apiVersion: "extensions/v1beta1",
kind: "Ingress",
metadata: {
name: "envoy-ingress",
namespace: "namespace",
annotations: {
"kubernetes.io/tls-acme": "true",
"ingress.kubernetes.io/ssl-redirect": "true",
"kubernetes.io/ingress.global-static-ip-name": "ipName",
},
},
spec: {
rules: [
{
host: "hostname",
http: {
paths: [
{
backend: {
serviceName: "envoy",
servicePort: 8080,
},
path: "/*",
},
],
},
},
],
tls: [
{
secretName: "secretName",
},
],
},
}) &&
std.assertEqual(iap.parts("namespace").ingress("secretName", "ipName", "null"), {
apiVersion: "extensions/v1beta1",
kind: "Ingress",
metadata: {
name: "envoy-ingress",
namespace: "namespace",
annotations: {
"kubernetes.io/tls-acme": "true",
"ingress.kubernetes.io/ssl-redirect": "true",
"kubernetes.io/ingress.global-static-ip-name": "ipName",
},
},
spec: {
rules: [
{
http: {
paths: [
{
backend: {
serviceName: "envoy",
servicePort: 8080,
},
path: "/*",
},
],
},
},
],
tls: [
{
secretName: "secretName",
},
],
},
}) &&
std.assertEqual(iap.parts("namespace").certificate("secretName", "hostname", "issuer"), {
apiVersion: "certmanager.k8s.io/v1alpha1",
kind: "Certificate",
metadata: {
name: "secretName",
namespace: "namespace",
},
spec: {
secretName: "secretName",
issuerRef: {
name: "issuer",
},
commonName: "hostname",
dnsNames: [
"hostname",
],
acme: {
config: [
{
http01: {
ingress: "envoy-ingress",
},
domains: [
"hostname",
],
},
],
},
},
}) &&
std.assertEqual(iap.parts("namespace").whoamiApp, {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "whoami-app",
namespace: "namespace",
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "whoami",
},
},
spec: {
containers: [
{
env: [
{
name: "PORT",
value: "8081",
},
],
image: "gcr.io/cloud-solutions-group/esp-sample-app:1.0.0",
name: "app",
ports: [
{
containerPort: 8081,
},
],
readinessProbe: {
failureThreshold: 2,
httpGet: {
path: "/healthz",
port: 8081,
scheme: "HTTP",
},
periodSeconds: 10,
successThreshold: 1,
timeoutSeconds: 5,
},
},
],
},
},
},
}) &&
std.assertEqual(iap.parts("namespace").whoamiService, {
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "whoami",
},
name: "whoami-app",
namespace: "namespace",
},
spec: {
ports: [
{
port: 80,
targetPort: 8081,
},
],
selector: {
app: "whoami",
},
type: "ClusterIP",
},
})

View File

@ -1,285 +0,0 @@
local jupyterhub = import "../jupyterhub.libsonnet";
local params = {
namespace:: "test-kf-001",
disks:: "disk01,disk02",
jupyterHubAuthenticator:: null,
jupyterHubServiceType:: "ClusterIP",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
jupyterNotebookPVCMount: "/home/jovyan",
jupyterNotebookRegistry: "gcr.io",
jupyterNotebookRepoName: "kubeflow-images-public",
cloud: null,
};
local baseSpawner = importstr "../kubeform_spawner.py";
// TODO(jlewi): We should be able to use std.startsWidth in later versions of jsonnet.
//
local config = jupyterhub.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks).data["jupyterhub_config.py"];
local configPrefix = std.substr(config, 0, std.length(baseSpawner));
local configSuffix = std.substr(config, std.length(baseSpawner), std.length(config) - std.length(baseSpawner));
local configSuffixLines = std.split(configSuffix, "\n");
// This assertion varies the config map is the same after zeroing the actual data.
// The data will be compared in subsequent steps.
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks) + {
data: {
"jupyterhub_config.py": "",
},
}
, {
apiVersion: "v1",
data: {
"jupyterhub_config.py": "",
},
kind: "ConfigMap",
metadata: {
name: "jupyterhub-config",
namespace: "test-kf-001",
},
}) &&
// This step verifies that the start of the spawner config is the raw file.
std.assertEqual(configPrefix, baseSpawner)
&&
// These step verifies the suffix.
// Verifying each line makes it much easier to debug test failures because if you just compare to a big blob
// of text its much harder to know where they differ.
std.assertEqual(configSuffixLines[1], "######## Authenticator ######")
&&
std.assertEqual(configSuffixLines[2], "c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator'")
&&
std.assertEqual(configSuffixLines[3], "###### Volumes #######")
&&
std.assertEqual(configSuffixLines[4], 'c.KubeSpawner.volumes.extend([{"name": "disk01", "persistentVolumeClaim": {"claimName": "disk01"}}, {"name": "disk02", "persistentVolumeClaim": {"claimName": "disk02"}}])')
&&
std.assertEqual(configSuffixLines[5], 'c.KubeSpawner.volume_mounts.extend([{"mountPath": "/mnt/disk01", "name": "disk01"}, {"mountPath": "/mnt/disk02", "name": "disk02"}])')
&&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubService,
{
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "tf-hub",
},
name: "tf-hub-0",
namespace: "test-kf-001",
},
spec: {
clusterIP: "None",
ports: [
{
name: "hub",
port: 8000,
},
],
selector: {
app: "tf-hub",
},
},
}) &&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubLoadBalancer(params.jupyterHubServiceType),
{
apiVersion: "v1",
kind: "Service",
metadata: {
labels: {
app: "tf-hub-lb",
},
name: "tf-hub-lb",
namespace: "test-kf-001",
annotations: {
"getambassador.io/config":
std.join("\n", [
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tf-hub-lb-hub-mapping",
"prefix: /hub/",
"rewrite: /hub/",
"timeout_ms: 300000",
"service: tf-hub-lb." + params.namespace,
"---",
"apiVersion: ambassador/v0",
"kind: Mapping",
"name: tf-hub-lb-user-mapping",
"prefix: /user/",
"rewrite: /user/",
"timeout_ms: 300000",
"service: tf-hub-lb." + params.namespace,
]),
}, //annotations
},
spec: {
ports: [
{
name: "hub",
port: 80,
targetPort: 8000,
},
],
selector: {
app: "tf-hub",
},
type: "ClusterIP",
},
}) &&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHub(params.jupyterHubImage, params.jupyterNotebookPVCMount, params.cloud, params.jupyterNotebookRegistry, params.jupyterNotebookRepoName),
{
apiVersion: "apps/v1beta1",
kind: "StatefulSet",
metadata: {
name: "tf-hub",
namespace: "test-kf-001",
},
spec: {
replicas: 1,
serviceName: "",
template: {
metadata: {
labels: {
app: "tf-hub",
},
},
spec: {
containers: [
{
command: [
"jupyterhub",
"-f",
"/etc/config/jupyterhub_config.py",
],
env: [
{
name: "NOTEBOOK_PVC_MOUNT",
value: params.jupyterNotebookPVCMount,
},
{
name: "CLOUD_NAME",
value: null,
},
{
name: "REGISTRY",
value: params.jupyterNotebookRegistry,
},
{
name: "REPO_NAME",
value: params.jupyterNotebookRepoName,
},
],
image: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
name: "tf-hub",
ports: [
{
containerPort: 8000,
},
{
containerPort: 8081,
},
],
volumeMounts: [
{
mountPath: "/etc/config",
name: "config-volume",
},
],
},
],
serviceAccountName: "jupyter-hub",
volumes: [
{
configMap: {
name: "jupyterhub-config",
},
name: "config-volume",
},
],
},
},
updateStrategy: {
type: "RollingUpdate",
},
},
}) &&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubRole,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: "jupyter-role",
namespace: "test-kf-001",
},
rules: [
{
apiGroups: [
"",
],
resources: [
"pods",
"persistentvolumeclaims",
],
verbs: [
"get",
"watch",
"list",
"create",
"delete",
],
},
{
apiGroups: [
"",
],
resources: [
"events",
],
verbs: [
"get",
"watch",
"list",
],
},
],
}) &&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubServiceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "jupyter-hub",
},
name: "jupyter-hub",
namespace: "test-kf-001",
},
}) &&
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubRoleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: "jupyter-role",
namespace: "test-kf-001",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: "jupyter-role",
},
subjects: [
{
kind: "ServiceAccount",
name: "jupyter-hub",
namespace: "test-kf-001",
},
],
})

View File

@ -1,93 +0,0 @@
local nfs = import "../nfs.libsonnet";
local params = {
namespace:: "test-kf-001",
name:: "nfs",
};
std.assertEqual(
nfs.parts(params.namespace, params.name).serviceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "nfsnfs-provisioner",
},
name: "nfs",
namespace: "test-kf-001",
},
}
) &&
std.assertEqual(
nfs.parts(params.namespace, params.name).role,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "Role",
metadata: {
name: "nfs",
namespace: "test-kf-001",
},
rules: [
{
apiGroups: [
"*",
],
resources: [
"*",
],
verbs: [
"*",
],
},
],
}
) &&
std.assertEqual(
nfs.parts(params.namespace, params.name).roleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "RoleBinding",
metadata: {
name: "nfs-nfs-role",
namespace: "test-kf-001",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "Role",
name: "nfs",
},
subjects: [
{
kind: "ServiceAccount",
name: "nfs",
namespace: "test-kf-001",
},
],
}
) &&
std.assertEqual(
nfs.parts(params.namespace, params.name).clusterRoleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
name: "nfs-nfs-role",
namespace: "test-kf-001",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "system:persistent-volume-provisioner",
},
subjects: [
{
kind: "ServiceAccount",
name: "nfs",
namespace: "test-kf-001",
},
],
}
)

View File

@ -1,110 +0,0 @@
local spartakus = import "../spartakus.libsonnet";
local params = {
namespace:: "test-kf-001",
usageId:: "unknown_cluster",
};
std.assertEqual(
spartakus.parts(params.namespace).role,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
},
rules: [
{
apiGroups: [
"",
],
resources: [
"nodes",
],
verbs: [
"get",
"list",
],
},
],
}
) &&
std.assertEqual(
spartakus.parts(params.namespace).roleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "spartakus",
},
subjects: [
{
kind: "ServiceAccount",
name: "spartakus",
namespace: "test-kf-001",
},
],
}
) &&
std.assertEqual(
spartakus.parts(params.namespace).serviceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "spartakus",
},
name: "spartakus",
namespace: "test-kf-001",
},
}
) &&
std.assertEqual(
spartakus.parts(params.namespace).deployment(params.usageId),
{
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "spartakus-volunteer",
namespace: "test-kf-001",
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
app: "spartakus-volunteer",
},
},
spec: {
containers: [
{
args: [
"volunteer",
"--cluster-id=unknown_cluster",
"--database=https://stats-collector.kubeflow.org",
],
image: "gcr.io/google_containers/spartakus-amd64:v1.0.0",
name: "volunteer",
},
],
serviceAccountName: "spartakus",
},
},
},
}
)

View File

@ -1,241 +0,0 @@
local tfjob = import "../tf-job-operator.libsonnet";
local params = {
namespace:: "test-kf-001",
cloud:: "azure",
tfJobImage:: "gcr.io/kubeflow-images-public/tf_operator:v20180226-403",
tfDefaultImage:: "null",
};
std.assertEqual(
tfjob.parts(params.namespace).tfJobDeploy(params.tfJobImage),
{
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "tf-job-operator",
namespace: "test-kf-001",
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
name: "tf-job-operator",
},
},
spec: {
containers: [
{
command: [
"/opt/mlkube/tf-operator",
"--controller-config-file=/etc/config/controller_config_file.yaml",
"--alsologtostderr",
"-v=1",
],
env: [
{
name: "MY_POD_NAMESPACE",
valueFrom: {
fieldRef: {
fieldPath: "metadata.namespace",
},
},
},
{
name: "MY_POD_NAME",
valueFrom: {
fieldRef: {
fieldPath: "metadata.name",
},
},
},
],
image: "gcr.io/kubeflow-images-public/tf_operator:v20180226-403",
name: "tf-job-operator",
volumeMounts: [
{
mountPath: "/etc/config",
name: "config-volume",
},
],
},
],
serviceAccountName: "tf-job-operator",
volumes: [
{
configMap: {
name: "tf-job-operator-config",
},
name: "config-volume",
},
],
},
},
},
}
) &&
std.assertEqual(
tfjob.parts(params.namespace).configMap(params.cloud, params.tfDefaultImage),
{
apiVersion: "v1",
data: {
"controller_config_file.yaml": '{\n "grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py"\n}',
},
kind: "ConfigMap",
metadata: {
name: "tf-job-operator-config",
namespace: "test-kf-001",
},
}
) &&
std.assertEqual(
tfjob.parts(params.namespace).serviceAccount,
{
apiVersion: "v1",
kind: "ServiceAccount",
metadata: {
labels: {
app: "tf-job-operator",
},
name: "tf-job-operator",
namespace: "test-kf-001",
},
}
) &&
std.assertEqual(
tfjob.parts(params.namespace).operatorRole,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRole",
metadata: {
labels: {
app: "tf-job-operator",
},
name: "tf-job-operator",
},
rules: [
{
apiGroups: [
"tensorflow.org",
"kubeflow.org",
],
resources: [
"tfjobs",
],
verbs: [
"*",
],
},
{
apiGroups: [
"apiextensions.k8s.io",
],
resources: [
"customresourcedefinitions",
],
verbs: [
"*",
],
},
{
apiGroups: [
"storage.k8s.io",
],
resources: [
"storageclasses",
],
verbs: [
"*",
],
},
{
apiGroups: [
"batch",
],
resources: [
"jobs",
],
verbs: [
"*",
],
},
{
apiGroups: [
"",
],
resources: [
"configmaps",
"pods",
"services",
"endpoints",
"persistentvolumeclaims",
"events",
],
verbs: [
"*",
],
},
{
apiGroups: [
"apps",
"extensions",
],
resources: [
"deployments",
],
verbs: [
"*",
],
},
],
}
) &&
std.assertEqual(
tfjob.parts(params.namespace).operatorRoleBinding,
{
apiVersion: "rbac.authorization.k8s.io/v1beta1",
kind: "ClusterRoleBinding",
metadata: {
labels: {
app: "tf-job-operator",
},
name: "tf-job-operator",
},
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: "tf-job-operator",
},
subjects: [
{
kind: "ServiceAccount",
name: "tf-job-operator",
namespace: "test-kf-001",
},
],
}
) &&
std.assertEqual(
tfjob.parts(params.namespace).crd,
{
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "tfjobs.kubeflow.org",
},
spec: {
group: "kubeflow.org",
names: {
kind: "TFJob",
plural: "tfjobs",
singular: "tfjob",
},
version: "v1alpha1",
},
}
)

Some files were not shown because too many files have changed in this diff Show More