mirror of https://github.com/kubeflow/examples.git
Merge branch 'master' into patch-1
This commit is contained in:
commit
225a7e9f90
|
|
@ -13,7 +13,7 @@ ignore=third_party
|
|||
|
||||
# Add files or directories matching the regex patterns to the blacklist. The
|
||||
# regex matches against base names, not paths.
|
||||
ignore-patterns=prediction_pb2.py,prediction_pb2_grpc.py,mnist_DDP.py,mnistddpserving.py
|
||||
ignore-patterns=object_detection_grpc_client.py,prediction_pb2.py,prediction_pb2_grpc.py,mnist_DDP.py,mnistddpserving.py
|
||||
|
||||
# Pickle collected data for later comparisons.
|
||||
persistent=no
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ acting as a starting point for new users and a reference guide for experienced u
|
|||
This repository is home to the following types of examples and demos:
|
||||
* [End-to-end](#end-to-end)
|
||||
* [Component-focused](#component-focused)
|
||||
* [Application-specific](#application-specific)
|
||||
* [Demos](#demos)
|
||||
|
||||
## End-to-end
|
||||
|
|
@ -81,11 +80,6 @@ This example covers the following concepts:
|
|||
1. GCS and GKE
|
||||
1. Serving with Seldon Core
|
||||
|
||||
|
||||
## Application-specific
|
||||
|
||||
1.
|
||||
|
||||
## Demos
|
||||
|
||||
Demos are for showing Kubeflow or one of its components publicly, with the
|
||||
|
|
|
|||
|
|
@ -1,4 +1,8 @@
|
|||
IMG = gcr.io/kubeflow-examples/code-search
|
||||
# Makefile for building images
|
||||
#
|
||||
# To override variables do
|
||||
# make ${TARGET} ${VAR}=${VALUE}
|
||||
IMG ?= gcr.io/kubeflow-examples/code-search
|
||||
|
||||
# List any changed files. We only include files in the notebooks directory.
|
||||
# because that is the code in the docker image.
|
||||
|
|
@ -19,6 +23,15 @@ all: build
|
|||
|
||||
TF_VERSION=1.11.0
|
||||
|
||||
|
||||
# Whether to use cached images with GCB
|
||||
USE_IMAGE_CACHE ?= true
|
||||
|
||||
echo:
|
||||
@echo IMG=$(IMG)
|
||||
@echo GIT_VERSION=$(GIT_VERSION)
|
||||
@echo TAG=$(TAG)
|
||||
|
||||
# To build without the cache set the environment variable
|
||||
# export DOCKER_BUILD_OPTS=--no-cache
|
||||
build-cpu:
|
||||
|
|
@ -48,13 +61,17 @@ build-dataflow:
|
|||
|
||||
build: build-cpu build-gpu build-dataflow
|
||||
|
||||
# Build using GCB. This is useful if we are on a slow internet connection
|
||||
# and don't want to pull
|
||||
build-gcb:
|
||||
# Build the GCB workflow
|
||||
build-gcb-spec:
|
||||
rm -rf ./build
|
||||
mkdir -p build
|
||||
jsonnet ./docker/t2t/build.jsonnet --ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
jsonnet ./docker/t2t/build.jsonnet --ext-str imageBase=$(IMG) \
|
||||
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
|
||||
> ./build/build.json
|
||||
# Build using GCB. This is useful if we are on a slow internet connection
|
||||
# and don't want to pull
|
||||
build-gcb: build-gcb-spec
|
||||
cp -r ./docker ./build/
|
||||
cp -r ./src ./build/
|
||||
rm -rf ./build/src/code_search/dataflow/cli/test_data
|
||||
|
|
|
|||
|
|
@ -26,6 +26,16 @@ We are using the following project
|
|||
```
|
||||
ks12 apply cs_demo -c search-index-server
|
||||
```
|
||||
|
||||
1. Copy the GCP service account to the namespace where the servers run
|
||||
|
||||
* The serving piece runs in a different namespace from Kubeflow
|
||||
* We need to copy the GCP service account to that namespace because the pod will try to mount it.
|
||||
|
||||
```
|
||||
kubectl -n kubeflow get secret user-gcp-sa -o json | jq -r '.data["user-gcp-sa.json"]' | base64 -d > ${SECRET_FILE}
|
||||
kubectl -n cs-web-app create secret generic user-gcp-sa --from-file=user-gcp-sa.json=${SECRET_FILE}
|
||||
```
|
||||
# Install Argo CD
|
||||
|
||||
```
|
||||
|
|
|
|||
|
|
@ -0,0 +1,145 @@
|
|||
"""Count lines of code in different types of file.
|
||||
|
||||
This has nothing to do with actually running code search.
|
||||
|
||||
The sole purpose of this script is to collect data for the presentation to
|
||||
illustrate the point that most effort isn't spent on ML.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
# Mapping from categories to regexes to include
|
||||
# These are applied to the full path.
|
||||
MATCH_RES = {
|
||||
"dataflow": [re.compile(r".*dataflow.*\.py")],
|
||||
"packaging (e.g dockerfile)": [
|
||||
re.compile(".*Dockerfile.*"),
|
||||
re.compile(r"code_search/src/.*requirements.*\.txt")],
|
||||
"cloud config": [re.compile(".*gcp_config.*")],
|
||||
"k8s & kubeflow config": [
|
||||
re.compile(r".*/cs-demo-1103/ks_app/components/.*"),
|
||||
re.compile(r".*/cs-demo-1103/k8s_specs/.*")],
|
||||
"model": [
|
||||
re.compile(r".*t2t/.*\.py")
|
||||
],
|
||||
"serving k8s config": [
|
||||
re.compile(r".*/ks-web-app/components/.*"),
|
||||
],
|
||||
"batch k8s config": [
|
||||
re.compile(r".*/kubeflow/components/.*"),
|
||||
],
|
||||
"serving code": [
|
||||
re.compile(r".*/code_search/nmslib/.*\.py"),
|
||||
re.compile(r".*/ui.*\.js$"),
|
||||
],
|
||||
}
|
||||
|
||||
# Regexes matching files to exclude
|
||||
NAME_EXCLUDES = [
|
||||
re.compile(r".*\.pyc"),
|
||||
re.compile(r"__init__\.py"),
|
||||
]
|
||||
|
||||
class Results(object):
|
||||
def __init__(self):
|
||||
self.files = []
|
||||
self.loc = 0
|
||||
|
||||
def add_file(self, full_path):
|
||||
self.files.append(full_path)
|
||||
with open(full_path) as hf:
|
||||
lines = hf.readlines()
|
||||
self.loc += len(lines)
|
||||
|
||||
@property
|
||||
def num_files(self):
|
||||
return len(self.files)
|
||||
|
||||
def classify_files(root_dir):
|
||||
"""Return lists of files in different categories
|
||||
|
||||
Args:
|
||||
root_dir: Root directory to begin searching in
|
||||
|
||||
Returns:
|
||||
categories: Dictionary mapping a category to list of files.
|
||||
"""
|
||||
categories = {}
|
||||
for k in MATCH_RES.iterkeys():
|
||||
categories[k] = Results()
|
||||
|
||||
for root, _, files in os.walk(root_dir):
|
||||
for name in files:
|
||||
full_path = os.path.join(root, name)
|
||||
exclude = False
|
||||
for m in NAME_EXCLUDES:
|
||||
if m.match(name):
|
||||
exclude = True
|
||||
break
|
||||
if exclude:
|
||||
continue
|
||||
for k, patterns in MATCH_RES.iteritems():
|
||||
for p in patterns:
|
||||
if p.match(full_path):
|
||||
categories[k].add_file(full_path)
|
||||
break
|
||||
|
||||
return categories
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format=('%(levelname)s|%(asctime)s'
|
||||
'|%(pathname)s|%(lineno)d| %(message)s'),
|
||||
datefmt='%Y-%m-%dT%H:%M:%S',
|
||||
)
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Create a CSV file containing # of PRs by company.")
|
||||
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
default="",
|
||||
type=str,
|
||||
help="The file to write.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.output:
|
||||
with tempfile.NamedTemporaryFile(prefix="tmpCS_demo_code_stats", dir=None,
|
||||
suffix=".csv",
|
||||
delete=True) as hf:
|
||||
args.output = hf.name
|
||||
logging.info("--output not specified; defaulting to %s", args.output)
|
||||
|
||||
root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
logging.info("root_dir")
|
||||
|
||||
categories = classify_files(root_dir)
|
||||
|
||||
for k, v in categories.iteritems():
|
||||
for path in v.files:
|
||||
print(k, path)
|
||||
|
||||
logging.info("Writing output to %s", args.output)
|
||||
with open(args.output, "w") as hf:
|
||||
writer = csv.writer(hf)
|
||||
std_writer = csv.writer(sys.stdout)
|
||||
|
||||
row = ["category", "number of files", "lines of code"]
|
||||
writer.writerow(row)
|
||||
std_writer.writerow(row)
|
||||
|
||||
for k, v in categories.iteritems():
|
||||
row = [k, v.num_files, v.loc]
|
||||
writer.writerow(row)
|
||||
std_writer.writerow(row)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -348,7 +348,7 @@ spec:
|
|||
- --status-processors
|
||||
- "20"
|
||||
- --operation-processors
|
||||
- "10"
|
||||
- "10"
|
||||
image: argoproj/argocd-application-controller:v0.10.6
|
||||
name: application-controller
|
||||
serviceAccountName: application-controller
|
||||
|
|
@ -402,6 +402,7 @@ spec:
|
|||
- /shared/app
|
||||
- --repo-server
|
||||
- argocd-repo-server:8081
|
||||
- --insecure
|
||||
image: argoproj/argocd-server:v0.10.6
|
||||
name: argocd-server
|
||||
readinessProbe:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,29 @@
|
|||
# Ambassador is only monitoring namespace kubeflow
|
||||
# so we define another service inside the kubeflow namespace
|
||||
# to do the mapping.
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
getambassador.io/config: |-
|
||||
---
|
||||
apiVersion: ambassador/v0
|
||||
kind: Mapping
|
||||
name: argo-cd-mapping
|
||||
prefix: /argocd/
|
||||
rewrite: /argocd/
|
||||
service: argocd-server.argocd
|
||||
name: argocd-mapping
|
||||
namespace: kubeflow
|
||||
spec:
|
||||
type: ExternalName
|
||||
externalName: argocd-server.argocd.svc.cluster.local
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
- name: https
|
||||
port: 443
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
|
|
@ -40,7 +40,7 @@
|
|||
useJupyterLabAsDefault: "false",
|
||||
},
|
||||
centraldashboard: {
|
||||
image: "gcr.io/kubeflow-images-public/centraldashboard:v0.3.0",
|
||||
image: "gcr.io/kubeflow-images-public/centraldashboard:v0.3.4",
|
||||
name: "centraldashboard",
|
||||
},
|
||||
"tf-job-operator": {
|
||||
|
|
|
|||
|
|
@ -267,6 +267,7 @@
|
|||
],
|
||||
resources: [
|
||||
"pods",
|
||||
"pods/log",
|
||||
"services",
|
||||
],
|
||||
verbs: [
|
||||
|
|
|
|||
|
|
@ -17,8 +17,8 @@ spec:
|
|||
# with the web app components is checked in.
|
||||
# repoURL: https://github.com/kubeflow/examples.git
|
||||
# targetRevision: HEAD
|
||||
repoURL: https://github.com/jlewi/examples.git
|
||||
targetRevision: cs_demo_argo_cd
|
||||
repoURL: https://github.com/kubeflow/examples.git
|
||||
targetRevision: master
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: True
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ The test can be run as follows
|
|||
|
||||
```
|
||||
cd code_search/src
|
||||
python3 -m code_searcch.t2t.similarity_transformer_export_test
|
||||
python3 -m code_search.t2t.similarity_transformer_test
|
||||
```
|
||||
The test just runs the relevant T2T steps and verifies they succeeds. No additional
|
||||
checks are executed.
|
||||
|
|
|
|||
|
|
@ -17,26 +17,26 @@ component="search-index-creator"
|
|||
|
||||
usage() {
|
||||
echo "Usage: launch_search_index_creator_job.sh
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--cluster=<cluster to deploy job to>
|
||||
--functionEmbeddingsDir=<input function embedding dir>
|
||||
--indexFile=<index file>
|
||||
--lookupFile=<lookup file>
|
||||
--functionEmbeddingsDir=<input function embedding dir>
|
||||
--timeout=<timeout>
|
||||
--namespace=<kubernetes namespace>
|
||||
--cluster=<cluster to deploy job to>"
|
||||
--timeout=<timeout>
|
||||
--workflowId=<workflow id invoking the container>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
names=(workflowId indexFile lookupFile dataDir namespace cluster)
|
||||
names=(cluster functionEmbeddingsDir indexFile lookupFile namespace workflowId)
|
||||
|
||||
source "${DIR}/parse_arguments.sh"
|
||||
source "${DIR}/initialize_kubectl.sh"
|
||||
|
||||
# Apply parameters
|
||||
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
|
||||
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
|
||||
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
||||
ks param set ${component} lookupFile ${lookupFile} --env ${ksEnvName}
|
||||
ks param set ${component} indexFile ${indexFile} --env ${ksEnvName}
|
||||
|
||||
ks show ${ksEnvName} -c "${component}"
|
||||
ks apply ${ksEnvName} -c "${component}"
|
||||
|
|
|
|||
|
|
@ -21,37 +21,36 @@ workerMachineType=n1-highcpu-32
|
|||
|
||||
usage() {
|
||||
echo "Usage: submit_code_embeddings_job.sh
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--modelDir=<directory contains the model>
|
||||
--dataDir=<data dir>
|
||||
--functionEmbeddingsDir=<output function embedding dir>
|
||||
--tokenPairsBQTable=<input token pairs BQ table>
|
||||
--cluster=<cluster to deploy job to>
|
||||
--dataDir=<data dir containing the pre generated vocabulary file>
|
||||
--functionEmbeddingsBQTable=<output function embedding BQ table>
|
||||
--functionEmbeddingsDir=<output function embedding dir>
|
||||
--modelDir=<directory contains the model>
|
||||
--namespace=<kubernetes namespace>
|
||||
--numWorkers=<num of workers>
|
||||
--project=<project>
|
||||
--timeout=<timeout>
|
||||
--workerMachineType=<worker machine type>
|
||||
--workingDir=<working dir>
|
||||
--cluster=<cluster to deploy job to>
|
||||
--namespace=<kubernetes namespace>"
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--workingDir=<working dir>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
names=(dataDir modelDir functionEmbeddingsDir tokenPairsBQTable functionEmbeddingsBQTable workingDir workflowId cluster namespace)
|
||||
names=(cluster dataDir functionEmbeddingsBQTable functionEmbeddingsDir modelDir namespace project workflowId workingDir)
|
||||
|
||||
source "${DIR}/parse_arguments.sh"
|
||||
source "${DIR}/initialize_kubectl.sh"
|
||||
|
||||
# Apply parameters
|
||||
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
||||
ks param set ${component} dataDir ${dataDir} --env ${ksEnvName}
|
||||
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
|
||||
ks param set ${component} tokenPairsBQTable ${tokenPairsBQTable} --env ${ksEnvName}
|
||||
ks param set ${component} functionEmbeddingsBQTable ${functionEmbeddingsBQTable} --env ${ksEnvName}
|
||||
ks param set ${component} functionEmbeddingsDir ${functionEmbeddingsDir} --env ${ksEnvName}
|
||||
ks param set ${component} jobNameSuffix ${workflowId} --env ${ksEnvName}
|
||||
ks param set ${component} modelDir ${modelDir} --env ${ksEnvName}
|
||||
ks param set ${component} project ${project} --env ${ksEnvName}
|
||||
ks param set ${component} workingDir ${workingDir} --env ${ksEnvName}
|
||||
ks param set ${component} numWorkers ${numWorkers} --env ${ksEnvName}
|
||||
ks param set ${component} project ${project} --env ${ksEnvName}
|
||||
ks param set ${component} workerMachineType ${workerMachineType} --env ${ksEnvName}
|
||||
ks param set ${component} workingDir ${workingDir} --env ${ksEnvName}
|
||||
|
||||
ks show ${ksEnvName} -c "${component}"
|
||||
ks apply ${ksEnvName} -c "${component}"
|
||||
|
|
|
|||
|
|
@ -8,22 +8,23 @@ set -ex
|
|||
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" > /dev/null && pwd)"
|
||||
|
||||
branch=master
|
||||
# Ksonnet Environment name. Always use pipeline
|
||||
ksEnvName="pipeline"
|
||||
|
||||
usage() {
|
||||
echo "Usage: update_index.sh
|
||||
--baseGitRepo=<base git repo name>
|
||||
--baseBranch=<base branch>
|
||||
--appDir=<ksonnet app dir>
|
||||
--baseBranch=<base branch>
|
||||
--baseGitRepo=<base git repo name>
|
||||
--botEmail=<email account of the bot that send the PR>
|
||||
--forkGitRepo=<github repo with Argo CD hooked up>
|
||||
--env=<ksonnet environment>
|
||||
--indexFile=<index file>
|
||||
--lookupFile=<lookup file>
|
||||
--workflowId=<workflow id invoking the container>
|
||||
--botEmail=<email account of the bot that send the PR>"
|
||||
--workflowId=<workflow id invoking the container>"
|
||||
}
|
||||
|
||||
# List of required parameters
|
||||
names=(baseGitRepo baseBranch appDir forkGitRepo env indexFile lookupFile workflowId botEmail)
|
||||
names=(appDir baseBranch baseGitRepo botEmail forkGitRepo indexFile lookupFile workflowId)
|
||||
|
||||
source "${DIR}/parse_arguments.sh"
|
||||
|
||||
|
|
@ -44,8 +45,8 @@ git fetch upstream
|
|||
git merge upstream/${baseBranch} master
|
||||
|
||||
git checkout -b ${workflowId}
|
||||
ks param set --env=${env} search-index-server indexFile ${indexFile}
|
||||
ks param set --env=${env} search-index-server lookupFile ${lookupFile}
|
||||
ks param set --env=${ksEnvName} search-index-server indexFile ${indexFile}
|
||||
ks param set --env=${ksEnvName} search-index-server lookupFile ${lookupFile}
|
||||
git add . && git commit -m "Update the lookup and index file."
|
||||
|
||||
FILE=$(mktemp tmp.create_pull_request.XXXX)
|
||||
|
|
|
|||
|
|
@ -3,10 +3,18 @@ ARG BASE_IMAGE_TAG=1.8.0
|
|||
FROM tensorflow/tensorflow:$BASE_IMAGE_TAG
|
||||
|
||||
RUN pip --no-cache-dir install oauth2client~=4.1.0 &&\
|
||||
apt-get update && apt-get install -y jq git &&\
|
||||
apt-get update && apt-get install -y jq git python3-pip &&\
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN pip --no-cache-dir install tensor2tensor~=1.10.0 tensorflow-hub~=0.1.1
|
||||
RUN pip --no-cache-dir install \
|
||||
tensor2tensor~=1.10.0 \
|
||||
tensorflow-hub~=0.1.1 \
|
||||
six
|
||||
|
||||
RUN pip3 --no-cache-dir install \
|
||||
tensor2tensor~=1.10.0 \
|
||||
tensorflow-hub~=0.1.1 \
|
||||
six
|
||||
|
||||
ADD src/code_search /app/code_search
|
||||
ADD src /src
|
||||
|
|
|
|||
|
|
@ -2,84 +2,94 @@
|
|||
// use latest as a cache so that rebuilds are fast
|
||||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
|
||||
{
|
||||
|
||||
"steps": [
|
||||
{
|
||||
"id": "pull-cpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["pull", "gcr.io/kubeflow-examples/code-search:latest"],
|
||||
"waitFor": ["-"],
|
||||
},
|
||||
{
|
||||
"id": "build-cpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
"--build-arg", "BASE_IMAGE_TAG=1.11.0",
|
||||
"--file=docker/t2t/Dockerfile",
|
||||
"--cache-from=gcr.io/kubeflow-examples/code-search:latest",
|
||||
"."],
|
||||
"waitFor": ["pull-cpu"],
|
||||
},
|
||||
{
|
||||
"id": "tag-cpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["tag", "gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search:latest",],
|
||||
"waitFor": ["build-cpu"],
|
||||
},
|
||||
{
|
||||
"id": "pull-gpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["pull", "gcr.io/kubeflow-examples/code-search-gpu:latest"],
|
||||
"waitFor": ["-"],
|
||||
},
|
||||
{
|
||||
"id": "build-gpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
"--build-arg", "BASE_IMAGE_TAG=1.11.0-gpu",
|
||||
"--file=docker/t2t/Dockerfile",
|
||||
"--cache-from=gcr.io/kubeflow-examples/code-search-gpu:latest",
|
||||
"."],
|
||||
"waitFor": ["pull-gpu"],
|
||||
},
|
||||
{
|
||||
"id": "tag-gpu",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["tag", "gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-gpu:latest",],
|
||||
"waitFor": ["build-gpu"],
|
||||
},
|
||||
{
|
||||
"id": "pull-dataflow",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["pull", "gcr.io/kubeflow-examples/code-search-dataflow:latest"],
|
||||
"waitFor": ["-"],
|
||||
},
|
||||
{
|
||||
"id": "build-dataflow",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["build", "-t", "gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
"--file=docker/t2t/Dockerfile.dataflow",
|
||||
"--cache-from=gcr.io/kubeflow-examples/code-search-dataflow:latest",
|
||||
"."],
|
||||
"waitFor": ["pull-dataflow"],
|
||||
},
|
||||
{
|
||||
"id": "tag-dataflow",
|
||||
"name": "gcr.io/cloud-builders/docker",
|
||||
"args": ["tag", "gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-dataflow:latest",],
|
||||
"waitFor": ["build-dataflow"],
|
||||
},
|
||||
],
|
||||
"images": ["gcr.io/kubeflow-examples/code-search:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search:latest",
|
||||
"gcr.io/kubeflow-examples/code-search-gpu:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-gpu:latest",
|
||||
"gcr.io/kubeflow-examples/code-search-dataflow:" + std.extVar("tag"),
|
||||
"gcr.io/kubeflow-examples/code-search-dataflow:latest"],
|
||||
}
|
||||
|
||||
// Convert non-boolean types like string,number to a boolean.
|
||||
// This is primarily intended for dealing with parameters that should be booleans.
|
||||
local toBool = function(x) {
|
||||
result::
|
||||
if std.type(x) == "boolean" then
|
||||
x
|
||||
else if std.type(x) == "string" then
|
||||
std.asciiUpper(x) == "TRUE"
|
||||
else if std.type(x) == "number" then
|
||||
x != 0
|
||||
else
|
||||
false,
|
||||
}.result,
|
||||
|
||||
local useImageCache = toBool(std.extVar("useImageCache")),
|
||||
|
||||
// A tempalte for defining the steps for building each image.
|
||||
local subGraphTemplate = {
|
||||
// following variables must be set
|
||||
name: null,
|
||||
|
||||
dockerFile: null,
|
||||
buildArg: null,
|
||||
|
||||
local template = self,
|
||||
|
||||
local pullStep = if useImageCache then [
|
||||
{
|
||||
id: "pull-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
|
||||
waitFor: ["-"],
|
||||
},
|
||||
] else [],
|
||||
|
||||
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
|
||||
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
|
||||
|
||||
|
||||
images: [image, imageLatest],
|
||||
steps: pullStep +
|
||||
[
|
||||
{
|
||||
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
|
||||
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
|
||||
|
||||
id: "build-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: [
|
||||
"build",
|
||||
"-t",
|
||||
image,
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
]
|
||||
+ buildArgList
|
||||
+ [
|
||||
"--file=" + template.dockerFile,
|
||||
]
|
||||
+ cacheList + ["."],
|
||||
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
|
||||
},
|
||||
{
|
||||
id: "tag-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["tag", image, imageLatest],
|
||||
waitFor: ["build-" + template.name],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local cpuSteps = subGraphTemplate {
|
||||
name: "t2t-cpu",
|
||||
dockerFile: "docker/t2t/Dockerfile",
|
||||
baseImageTag: "BASE_IMAGE_TAG=1.11.0",
|
||||
},
|
||||
|
||||
local gpuSteps = subGraphTemplate {
|
||||
name: "t2t-gpu",
|
||||
dockerFile: "docker/t2t/Dockerfile",
|
||||
baseImageTag: "BASE_IMAGE_TAG=1.11.0-gpu",
|
||||
},
|
||||
|
||||
local dataflowSteps = subGraphTemplate {
|
||||
name: "dataflow",
|
||||
dockerFile: "docker/t2t/Dockerfile.dataflow",
|
||||
},
|
||||
|
||||
steps: cpuSteps.steps + gpuSteps.steps + dataflowSteps.steps,
|
||||
images: cpuSteps.images + gpuSteps.images + dataflowSteps.images,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,21 @@
|
|||
local params = std.extVar("__ksonnet/params");
|
||||
local globals = import "globals.libsonnet";
|
||||
local params = std.extVar('__ksonnet/params');
|
||||
local globals = import 'globals.libsonnet';
|
||||
local envParams = params + {
|
||||
components +: {
|
||||
// Insert component parameter overrides here. Ex:
|
||||
// guestbook +: {
|
||||
// name: "guestbook-dev",
|
||||
// replicas: params.global.replicas,
|
||||
// },
|
||||
components+: {
|
||||
"search-index-server"+: {
|
||||
dataDir: 'gs://code-search-demo/models/20181107-dist-sync-gpu',
|
||||
indexFile: 'gs://code-search-demo/20181104/code-embeddings-index/embeddings.index',
|
||||
lookupFile: 'gs://code-search-demo/20181104/code-embeddings-index/embedding-to-info.csv',
|
||||
},
|
||||
"query-embed-server"+: {
|
||||
modelBasePath: 'gs://code-search-demo/models/20181107-dist-sync-gpu/export/',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
{
|
||||
components: {
|
||||
[x]: envParams.components[x] + globals, for x in std.objectFields(envParams.components)
|
||||
[x]: envParams.components[x] + globals
|
||||
for x in std.objectFields(envParams.components)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@
|
|||
//tokenPairsBQTable: self.project,
|
||||
//functionEmbeddingsBQTable: "someothervalue",
|
||||
tokenPairsBQTable: self.project + ":" + self.bqDataset + ".token_pairs",
|
||||
failedTokenizeBQTable: self.project + ":" + self.bqDataset + ".failed_tokenize",
|
||||
jobNameSuffix: "20181201-1530",
|
||||
bqSuffix: std.strReplace(self.jobNameSuffix, "-", "_"),
|
||||
functionEmbeddingsBQTable: self.project + ":" + self.bqDataset + ".code_embeddings_" + self.bqSuffix,
|
||||
|
|
@ -34,7 +35,5 @@
|
|||
name: "pipeline",
|
||||
problem: "kf_github_function_docstring",
|
||||
project: "code-search-demo",
|
||||
bqDataset: "code_search",
|
||||
tokenPairsBQTable: self.project + ":" + self.bqDataset + ".token_pairs",
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
// are not picked up by the individual components.
|
||||
// Need to see if we can find a way to fix this.
|
||||
|
||||
local imageTag = "v20181201-ae61193-dirty-d11191",
|
||||
local imageTag = "v20181204-ee47a49-dirty-f4045c",
|
||||
|
||||
"t2t-job": {
|
||||
jobType: "trainer",
|
||||
|
|
@ -121,7 +121,6 @@
|
|||
workingDir: $.components["t2t-code-search"].workingDir,
|
||||
dataDir: self.workingDir + "/data",
|
||||
functionEmbeddingsDir: self.workingDir + "/code_embeddings",
|
||||
tokenPairsBQTable: "",
|
||||
functionEmbeddingsBQTable: "",
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ local jobSpec = {
|
|||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
backoffLimit: 0,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
backoffLimit: 0,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
|
|
@ -32,7 +33,6 @@
|
|||
"code_search.dataflow.cli.create_function_embeddings",
|
||||
"--runner=DataflowRunner",
|
||||
"--project=" + params.project,
|
||||
"--token_pairs_table=" + params.tokenPairsBQTable,
|
||||
"--function_embeddings_table=" + params.functionEmbeddingsBQTable,
|
||||
"--output_dir=" + params.functionEmbeddingsDir,
|
||||
"--data_dir=" + params.dataDir,
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ local jobSpec = {
|
|||
"python2",
|
||||
"-m",
|
||||
"code_search.dataflow.cli.preprocess_github_dataset",
|
||||
"--runner=DataflowRunner",
|
||||
"--runner=DataflowRunner",
|
||||
"--project=" + params.project,
|
||||
"--target_dataset=" + params.targetDataset,
|
||||
"--data_dir=" + params.dataDir,
|
||||
|
|
@ -50,7 +50,7 @@ local jobSpec = {
|
|||
value: "/secret/gcp-credentials/user-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
workingDir: "/src",
|
||||
workingDir: "/src",
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,198 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Install Kubeflow Pipelines SDK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Collecting https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz\n",
|
||||
"\u001b[?25l Downloading https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz (69kB)\n",
|
||||
"\u001b[K 100% |████████████████████████████████| 71kB 4.0MB/s eta 0:00:01\n",
|
||||
"\u001b[?25hRequirement already satisfied, skipping upgrade: urllib3>=1.15 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.22)\n",
|
||||
"Requirement already satisfied, skipping upgrade: six>=1.10 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.11.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: certifi in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (2018.11.29)\n",
|
||||
"Requirement already satisfied, skipping upgrade: python-dateutil in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (2.7.5)\n",
|
||||
"Requirement already satisfied, skipping upgrade: PyYAML in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (3.13)\n",
|
||||
"Requirement already satisfied, skipping upgrade: google-cloud-storage==1.13.0 in /opt/conda/lib/python3.6/site-packages (from kfp==0.1) (1.13.0)\n",
|
||||
"Collecting kubernetes==8.0.0 (from kfp==0.1)\n",
|
||||
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/6c/44/f8286fb7a25a4ff29a4dec1b5baa49571eedc2b2edf6ec4b51e4b511ac0f/kubernetes-8.0.0-py2.py3-none-any.whl (1.3MB)\n",
|
||||
"\u001b[K 100% |████████████████████████████████| 1.4MB 14.9MB/s ta 0:00:01\n",
|
||||
"\u001b[?25hRequirement already satisfied, skipping upgrade: google-resumable-media>=0.3.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (0.3.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: google-api-core<2.0.0dev,>=0.1.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (1.6.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: google-cloud-core<0.29dev,>=0.28.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-storage==1.13.0->kfp==0.1) (0.28.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: requests in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (2.18.4)\n",
|
||||
"Requirement already satisfied, skipping upgrade: google-auth>=1.0.1 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (1.6.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: setuptools>=21.0.0 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (38.4.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (0.54.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: requests-oauthlib in /opt/conda/lib/python3.6/site-packages (from kubernetes==8.0.0->kfp==0.1) (1.0.0)\n",
|
||||
"Collecting adal>=1.0.2 (from kubernetes==8.0.0->kfp==0.1)\n",
|
||||
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/2d/2f/14882b8dae0977e85577abde3065c141fb94dbb242adfb80e21797e4f7c9/adal-1.2.0-py2.py3-none-any.whl (52kB)\n",
|
||||
"\u001b[K 100% |████████████████████████████████| 61kB 20.2MB/s ta 0:00:01\n",
|
||||
"\u001b[?25hRequirement already satisfied, skipping upgrade: pytz in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (2018.7)\n",
|
||||
"Requirement already satisfied, skipping upgrade: protobuf>=3.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (3.6.1)\n",
|
||||
"Requirement already satisfied, skipping upgrade: googleapis-common-protos!=1.5.4,<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=0.1.1->google-cloud-storage==1.13.0->kfp==0.1) (1.5.5)\n",
|
||||
"Requirement already satisfied, skipping upgrade: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests->kubernetes==8.0.0->kfp==0.1) (3.0.4)\n",
|
||||
"Requirement already satisfied, skipping upgrade: idna<2.7,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests->kubernetes==8.0.0->kfp==0.1) (2.6)\n",
|
||||
"Requirement already satisfied, skipping upgrade: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (3.0.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (0.2.2)\n",
|
||||
"Requirement already satisfied, skipping upgrade: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (4.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: oauthlib>=0.6.2 in /opt/conda/lib/python3.6/site-packages (from requests-oauthlib->kubernetes==8.0.0->kfp==0.1) (2.1.0)\n",
|
||||
"Collecting PyJWT>=1.0.0 (from adal>=1.0.2->kubernetes==8.0.0->kfp==0.1)\n",
|
||||
" Downloading https://files.pythonhosted.org/packages/87/8b/6a9f14b5f781697e51259d81657e6048fd31a113229cf346880bb7545565/PyJWT-1.7.1-py2.py3-none-any.whl\n",
|
||||
"Requirement already satisfied, skipping upgrade: cryptography>=1.1.0 in /opt/conda/lib/python3.6/site-packages (from adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (2.1.4)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.1 in /opt/conda/lib/python3.6/site-packages (from pyasn1-modules>=0.2.1->google-auth>=1.0.1->kubernetes==8.0.0->kfp==0.1) (0.4.4)\n",
|
||||
"Requirement already satisfied, skipping upgrade: asn1crypto>=0.21.0 in /opt/conda/lib/python3.6/site-packages (from cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (0.24.0)\n",
|
||||
"Requirement already satisfied, skipping upgrade: cffi>=1.7 in /opt/conda/lib/python3.6/site-packages (from cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (1.11.4)\n",
|
||||
"Requirement already satisfied, skipping upgrade: pycparser in /opt/conda/lib/python3.6/site-packages (from cffi>=1.7->cryptography>=1.1.0->adal>=1.0.2->kubernetes==8.0.0->kfp==0.1) (2.18)\n",
|
||||
"Building wheels for collected packages: kfp\n",
|
||||
" Running setup.py bdist_wheel for kfp ... \u001b[?25ldone\n",
|
||||
"\u001b[?25h Stored in directory: /tmp/pip-ephem-wheel-cache-yt23ripq/wheels/00/ab/c6/a055a8d8730d1b5b508b0d9ac42f12b531b1bbf575b31efe73\n",
|
||||
"Successfully built kfp\n",
|
||||
"\u001b[31mfairing 0.0.3 has requirement kubernetes==6.0.0, but you'll have kubernetes 8.0.0 which is incompatible.\u001b[0m\n",
|
||||
"Installing collected packages: PyJWT, adal, kubernetes, kfp\n",
|
||||
" Found existing installation: kubernetes 6.0.0\n",
|
||||
" Uninstalling kubernetes-6.0.0:\n",
|
||||
" Successfully uninstalled kubernetes-6.0.0\n",
|
||||
" Found existing installation: kfp 0.1\n",
|
||||
" Uninstalling kfp-0.1:\n",
|
||||
" Successfully uninstalled kfp-0.1\n",
|
||||
"Successfully installed PyJWT-1.7.1 adal-1.2.0 kfp-0.1 kubernetes-8.0.0\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip3 install https://storage.googleapis.com/ml-pipeline/release/0.1.3/kfp.tar.gz --upgrade"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import required library"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import kfp\n",
|
||||
"from kfp import compiler\n",
|
||||
"from kubernetes import client as k8s_client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Import pipeline definition"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import index_update_pipeline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Create an experiment first"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Found experiment with name: jlewi-notebook-test\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"client = kfp.Client()\n",
|
||||
"name = 'jlewi-notebook-test'\n",
|
||||
"\n",
|
||||
"exp = None\n",
|
||||
"experiments = client.list_experiments()\n",
|
||||
"for e in experiments.experiments:\n",
|
||||
" if e.name == name:\n",
|
||||
" print(\"Found experiment with name: %s\" % name)\n",
|
||||
" exp = e\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
"if not exp: \n",
|
||||
" print(\"Creating new experiment\")\n",
|
||||
" exp = client.create_experiment(name=name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"Run link <a href=\"/pipeline/#/runs/details/dd14a369-fcb0-11e8-a5c5-42010a8e0036\" target=\"_blank\" >here</a>"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Compile it into a tar package.\n",
|
||||
"compiler.Compiler().compile(index_update_pipeline.function_embedding_update, 'function_embedding_update.tar.gz')\n",
|
||||
"\n",
|
||||
"# Submit a run.\n",
|
||||
"# inputs - experiment id, run name, tarball file\n",
|
||||
"run = client.run_pipeline(exp.id, 'code-search-function-embedding', 'function_embedding_update.tar.gz')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
|
|
@ -1,127 +1,93 @@
|
|||
from typing import Dict
|
||||
# Example Pipeline to update code search UI configuration
|
||||
# To compile, use Kubeflow Pipelines V0.1.3 SDK or above.
|
||||
|
||||
import uuid
|
||||
from kubernetes import client as k8s_client
|
||||
import kfp.dsl as dsl
|
||||
import kfp.gcp as gcp
|
||||
|
||||
|
||||
# disable max arg lint check
|
||||
# pylint: disable=R0913
|
||||
|
||||
|
||||
def default_gcp_op(name: str, image: str, command: str = None,
|
||||
arguments: str = None, file_inputs: Dict[dsl.PipelineParam, str] = None,
|
||||
file_outputs: Dict[str, str] = None, is_exit_handler=False):
|
||||
"""An operator that mounts the default GCP service account to the container.
|
||||
|
||||
The user-gcp-sa secret is created as part of the kubeflow deployment that
|
||||
stores the access token for kubeflow user service account.
|
||||
|
||||
With this service account, the container has a range of GCP APIs to
|
||||
access to. This service account is automatically created as part of the
|
||||
kubeflow deployment.
|
||||
|
||||
For the list of the GCP APIs this service account can access to, check
|
||||
https://github.com/kubeflow/kubeflow/blob/7b0db0d92d65c0746ac52b000cbc290dac7c62b1/deployment/gke/deployment_manager_configs/iam_bindings_template.yaml#L18
|
||||
|
||||
If you want to call the GCP APIs in a different project, grant the kf-user
|
||||
service account access permission.
|
||||
"""
|
||||
|
||||
return (
|
||||
dsl.ContainerOp(
|
||||
name,
|
||||
image,
|
||||
command,
|
||||
arguments,
|
||||
file_inputs,
|
||||
file_outputs,
|
||||
is_exit_handler,
|
||||
)
|
||||
.add_volume(
|
||||
k8s_client.V1Volume(
|
||||
name='gcp-credentials',
|
||||
secret=k8s_client.V1SecretVolumeSource(
|
||||
secret_name='user-gcp-sa'
|
||||
)
|
||||
)
|
||||
)
|
||||
.add_volume_mount(
|
||||
k8s_client.V1VolumeMount(
|
||||
mount_path='/secret/gcp-credentials',
|
||||
name='gcp-credentials',
|
||||
)
|
||||
)
|
||||
.add_env_variable(
|
||||
k8s_client.V1EnvVar(
|
||||
name='GOOGLE_APPLICATION_CREDENTIALS',
|
||||
value='/secret/gcp-credentials/user-gcp-sa.json'
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def dataflow_function_embedding_op(
|
||||
project: 'GcpProject', cluster_name: str, token_pairs_bq_table: str,
|
||||
function_embeddings_bq_table: str, data_dir: 'GcsUri',
|
||||
function_embeddings_dir: str, saved_model_dir: 'GcsUri', workflow_id: str,
|
||||
worker_machine_type: str, num_workers: int, working_dir: str, namespace: str):
|
||||
return default_gcp_op(
|
||||
cluster_name: str,
|
||||
function_embeddings_bq_table: str,
|
||||
function_embeddings_dir: str,
|
||||
namespace: str,
|
||||
num_workers: int,
|
||||
project: 'GcpProject',
|
||||
saved_model_dir: 'GcsUri',
|
||||
worker_machine_type: str,
|
||||
workflow_id: str,
|
||||
working_dir: str,):
|
||||
return dsl.ContainerOp(
|
||||
name='dataflow_function_embedding',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
|
||||
command=['/usr/local/src/submit_code_embeddings_job.sh'],
|
||||
arguments=[
|
||||
"--workflowId=%s" % workflow_id,
|
||||
"--modelDir=%s" % saved_model_dir,
|
||||
"--dataDir=%s" % data_dir,
|
||||
"--cluster=%s" % cluster_name,
|
||||
"--dataDir=%s" % 'gs://code-search-demo/20181104/data',
|
||||
"--functionEmbeddingsDir=%s" % function_embeddings_dir,
|
||||
"--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table,
|
||||
"--modelDir=%s" % saved_model_dir,
|
||||
"--namespace=%s" % namespace,
|
||||
"--numWorkers=%s" % num_workers,
|
||||
"--project=%s" % project,
|
||||
"--tokenPairsBQTable=%s" % token_pairs_bq_table,
|
||||
"--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table,
|
||||
"--workerMachineType=%s" % worker_machine_type,
|
||||
"--workflowId=%s" % workflow_id,
|
||||
"--workingDir=%s" % working_dir,
|
||||
"--cluster=%s" % cluster_name,
|
||||
"--namespace=%s" % namespace,
|
||||
]
|
||||
)
|
||||
).apply(gcp.use_gcp_secret('user-gcp-sa'))
|
||||
|
||||
|
||||
|
||||
def search_index_creator_op(
|
||||
index_file: str, lookup_file: str, function_embeddings_dir: str,
|
||||
workflow_id: str, cluster_name: str, namespace: str):
|
||||
cluster_name: str,
|
||||
function_embeddings_dir: str,
|
||||
index_file: str,
|
||||
lookup_file: str,
|
||||
namespace: str,
|
||||
workflow_id: str):
|
||||
return dsl.ContainerOp(
|
||||
# use component name as step name
|
||||
name='search_index_creator',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
|
||||
command=['/usr/local/src/launch_search_index_creator_job.sh'],
|
||||
arguments=[
|
||||
'--cluster=%s' % cluster_name,
|
||||
'--functionEmbeddingsDir=%s' % function_embeddings_dir,
|
||||
'--indexFile=%s' % index_file,
|
||||
'--lookupFile=%s' % lookup_file,
|
||||
'--functionEmbeddingsDir=%s' % function_embeddings_dir,
|
||||
'--workflowId=%s' % workflow_id,
|
||||
'--cluster=%s' % cluster_name,
|
||||
'--namespace=%s' % namespace,
|
||||
'--workflowId=%s' % workflow_id,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def update_index_op(
|
||||
base_git_repo: str, base_branch: str, app_dir: str, fork_git_repo: str,
|
||||
index_file: str, lookup_file: str, workflow_id: str, bot_email: str):
|
||||
app_dir: str,
|
||||
base_branch: str,
|
||||
base_git_repo: str,
|
||||
bot_email: str,
|
||||
fork_git_repo: str,
|
||||
index_file: str,
|
||||
lookup_file: str,
|
||||
workflow_id: str):
|
||||
return (
|
||||
dsl.ContainerOp(
|
||||
name='update_index',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181202-fbf5905-dirty-a8480a',
|
||||
image='gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
|
||||
command=['/usr/local/src/update_index.sh'],
|
||||
arguments=[
|
||||
'--baseGitRepo=%s' % base_git_repo,
|
||||
'--baseBranch=%s' % base_branch,
|
||||
'--appDir=%s' % app_dir,
|
||||
'--baseBranch=%s' % base_branch,
|
||||
'--baseGitRepo=%s' % base_git_repo,
|
||||
'--botEmail=%s' % bot_email,
|
||||
'--forkGitRepo=%s' % fork_git_repo,
|
||||
'--env=%s' % 'pipeline',
|
||||
'--indexFile=%s' % index_file,
|
||||
'--lookupFile=%s' % lookup_file,
|
||||
'--workflowId=%s' % workflow_id,
|
||||
'--botEmail=%s' % bot_email,
|
||||
],
|
||||
)
|
||||
.add_volume(
|
||||
|
|
@ -148,50 +114,69 @@ def update_index_op(
|
|||
|
||||
# The pipeline definition
|
||||
@dsl.pipeline(
|
||||
name='function_embedding',
|
||||
description='Example function embedding pipeline'
|
||||
name='github_code_index_update',
|
||||
description='Example of pipeline to update github code index'
|
||||
)
|
||||
def function_embedding_update(
|
||||
def github_code_index_update(
|
||||
project='code-search-demo',
|
||||
cluster_name='cs-demo-1103',
|
||||
namespace='kubeflow',
|
||||
working_dir='gs://code-search-demo/pipeline',
|
||||
data_dir='gs://code-search-demo/20181104/data',
|
||||
saved_model_dir='gs://code-search-demo/models/20181107-dist-sync-gpu/export/1541712907/',
|
||||
target_dataset='code_search',
|
||||
worker_machine_type='n1-highcpu-32',
|
||||
function_embedding_num_workers=5,
|
||||
num_workers=5,
|
||||
base_git_repo='kubeflow/examples',
|
||||
base_branch='master',
|
||||
app_dir='code_search/ks-web-app',
|
||||
fork_git_repo='IronPan/examples',
|
||||
bot_email='kf.sample.bot@gmail.com'):
|
||||
bot_email='kf.sample.bot@gmail.com',
|
||||
# Can't use workflow name as bq_suffix since BQ table doesn't accept '-' and
|
||||
# workflow name is assigned at runtime. Pipeline might need to support
|
||||
# replacing characters in workflow name.
|
||||
# For recurrent pipeline, pass in '[[Index]]' instead, for unique naming.
|
||||
bq_suffix=uuid.uuid4().hex[:6].upper()):
|
||||
workflow_name = '{{workflow.name}}'
|
||||
# Can't use workflow name as bq_suffix since BQ table doesn't accept '-' and
|
||||
# workflow name is assigned at runtime. Pipeline might need to support
|
||||
# replacing characters in workflow name.
|
||||
bq_suffix = uuid.uuid4().hex[:6].upper()
|
||||
working_dir = '%s/%s' % (working_dir, workflow_name)
|
||||
lookup_file = '%s/code-embeddings-index/embedding-to-info.csv' % working_dir
|
||||
index_file = '%s/code-embeddings-index/embeddings.index'% working_dir
|
||||
function_embeddings_dir = '%s/%s' % (working_dir, "/code_embeddings")
|
||||
token_pairs_bq_table = '%s:%s.token_pairs' %(project, target_dataset)
|
||||
function_embeddings_dir = '%s/%s' % (working_dir, "code_embeddings")
|
||||
function_embeddings_bq_table = \
|
||||
'%s:%s.function_embeddings_%s' % (project, target_dataset, bq_suffix)
|
||||
|
||||
function_embedding = dataflow_function_embedding_op(
|
||||
project, cluster_name, token_pairs_bq_table, function_embeddings_bq_table,
|
||||
data_dir, function_embeddings_dir, saved_model_dir, workflow_name,
|
||||
worker_machine_type, function_embedding_num_workers, working_dir, namespace)
|
||||
cluster_name,
|
||||
function_embeddings_bq_table,
|
||||
function_embeddings_dir,
|
||||
namespace,
|
||||
num_workers,
|
||||
project,
|
||||
saved_model_dir,
|
||||
worker_machine_type,
|
||||
workflow_name,
|
||||
working_dir)
|
||||
|
||||
search_index_creator = search_index_creator_op(
|
||||
index_file, lookup_file, function_embeddings_dir, workflow_name, cluster_name, namespace)
|
||||
cluster_name,
|
||||
function_embeddings_dir,
|
||||
index_file,
|
||||
lookup_file,
|
||||
namespace,
|
||||
workflow_name)
|
||||
search_index_creator.after(function_embedding)
|
||||
|
||||
update_index_op(
|
||||
base_git_repo, base_branch, app_dir, fork_git_repo,
|
||||
index_file, lookup_file, workflow_name, bot_email).after(search_index_creator)
|
||||
app_dir,
|
||||
base_branch,
|
||||
base_git_repo,
|
||||
bot_email,
|
||||
fork_git_repo,
|
||||
index_file,
|
||||
lookup_file,
|
||||
workflow_name).after(search_index_creator)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import kfp.compiler as compiler
|
||||
|
||||
compiler.Compiler().compile(function_embedding_update, __file__ + '.tar.gz')
|
||||
compiler.Compiler().compile(github_code_index_update, __file__ + '.tar.gz')
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class PipelineCLIOptions(pipeline_options.StandardOptions,
|
|||
def add_parser_arguments(parser):
|
||||
additional_args_parser = parser.add_argument_group('Custom Arguments')
|
||||
additional_args_parser.add_argument('--target_dataset', metavar='', type=str,
|
||||
help='BigQuery dataset for output results')
|
||||
help='BigQuery dataset for output results')
|
||||
additional_args_parser.add_argument('--pre_transformed', action='store_true',
|
||||
help='Use a pre-transformed BigQuery dataset')
|
||||
additional_args_parser.add_argument('--wait_until_finished', action='store_true',
|
||||
|
|
@ -42,6 +42,10 @@ def add_parser_arguments(parser):
|
|||
help=('If specified read the entire GitHub dataset '
|
||||
'specified as PROJECT:DATASET.TABLE. If not '
|
||||
'specified we run a query to filter the data.'))
|
||||
additional_args_parser.add_argument('--failed_tokenize_table', metavar='', type=str,
|
||||
help='The BigQuery table containing the '
|
||||
'failed tokenize entry. This should be '
|
||||
'of the form PROJECT:DATASET.TABLE.')
|
||||
|
||||
predict_args_parser = parser.add_argument_group('Batch Prediction Arguments')
|
||||
predict_args_parser.add_argument('--token_pairs_table', metavar='', type=str,
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import apache_beam as beam
|
|||
import code_search.dataflow.cli.arguments as arguments
|
||||
from code_search.dataflow.transforms import bigquery
|
||||
import code_search.dataflow.transforms.github_bigquery as gh_bq
|
||||
import code_search.dataflow.transforms.github_dataset as github_dataset
|
||||
import code_search.dataflow.transforms.function_embeddings as func_embed
|
||||
import code_search.dataflow.do_fns.dict_to_csv as dict_to_csv
|
||||
|
||||
|
|
@ -30,12 +31,21 @@ def create_function_embeddings(argv=None):
|
|||
|
||||
pipeline = beam.Pipeline(options=pipeline_opts)
|
||||
|
||||
token_pairs_query = gh_bq.ReadTransformedGithubDatasetQuery(
|
||||
args.token_pairs_table)
|
||||
token_pairs_source = beam.io.BigQuerySource(
|
||||
query=token_pairs_query.query_string, use_standard_sql=True)
|
||||
embeddings = (pipeline
|
||||
| "Read Transformed Github Dataset" >> beam.io.Read(token_pairs_source)
|
||||
if args.token_pairs_table:
|
||||
token_pairs_query = gh_bq.ReadTransformedGithubDatasetQuery(
|
||||
args.token_pairs_table)
|
||||
token_pairs_source = beam.io.BigQuerySource(
|
||||
query=token_pairs_query.query_string, use_standard_sql=True)
|
||||
token_pairs = (pipeline
|
||||
| "Read Transformed Github Dataset" >> beam.io.Read(token_pairs_source)
|
||||
)
|
||||
else:
|
||||
token_pairs = (pipeline
|
||||
| "Read Github Dataset" >> gh_bq.ReadGithubDataset(args.project)
|
||||
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(None, None)
|
||||
)
|
||||
|
||||
embeddings = (token_pairs
|
||||
| "Compute Function Embeddings" >> func_embed.FunctionEmbeddings(args.problem,
|
||||
args.data_dir,
|
||||
args.saved_model_dir)
|
||||
|
|
@ -59,7 +69,7 @@ def create_function_embeddings(argv=None):
|
|||
)
|
||||
|
||||
(embeddings # pylint: disable=expression-not-assigned
|
||||
| "Format for CSV Write" >> beam.ParDo(dict_to_csv.DictToCSVString(
|
||||
| "Format for Embeddings CSV Write" >> beam.ParDo(dict_to_csv.DictToCSVString(
|
||||
['nwo', 'path', 'function_name', 'lineno', 'original_function', 'function_embedding']))
|
||||
| "Write Embeddings to CSV" >> beam.io.WriteToText('{}/func-index'.format(args.output_dir),
|
||||
file_name_suffix='.csv',
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ def preprocess_github_dataset(argv=None):
|
|||
- If Github Python files have already been processed, use the
|
||||
pre-processed table instead (using flag `--pre-transformed`)
|
||||
- Tokenize files into pairs of function definitions and docstrings
|
||||
- All results are stored in a BigQuery dataset (`args.target_dataset`)
|
||||
- See `transforms.github_dataset.TransformGithubDataset` for details of tables created
|
||||
- Additionally, store pairs of docstring and function tokens in a CSV file
|
||||
for training
|
||||
|
|
@ -59,8 +58,8 @@ def preprocess_github_dataset(argv=None):
|
|||
input_records = (pipeline
|
||||
| "Read Github Dataset" >> gh_bq.ReadGithubDataset(args.project))
|
||||
token_pairs = (input_records
|
||||
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(args.project,
|
||||
args.target_dataset)
|
||||
| "Transform Github Dataset" >> github_dataset.TransformGithubDataset(
|
||||
args.token_pairs_table, args.failed_tokenize_table)
|
||||
)
|
||||
|
||||
(token_pairs # pylint: disable=expression-not-assigned
|
||||
|
|
@ -73,7 +72,7 @@ def preprocess_github_dataset(argv=None):
|
|||
|
||||
result = pipeline.run()
|
||||
logging.info("Submitted Dataflow job: %s", result)
|
||||
if args.wait_until_finish:
|
||||
if args.wait_until_finished:
|
||||
result.wait_until_finish()
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -77,12 +77,10 @@ class BigQueryWrite(beam.PTransform):
|
|||
]
|
||||
"""
|
||||
|
||||
def __init__(self, project, dataset, table, batch_size=500,
|
||||
def __init__(self, table, batch_size=500,
|
||||
write_disposition=bigquery.BigQueryDisposition.WRITE_TRUNCATE):
|
||||
super(BigQueryWrite, self).__init__()
|
||||
|
||||
self.project = project
|
||||
self.dataset = dataset
|
||||
self.table = table
|
||||
self.write_disposition = write_disposition
|
||||
self.batch_size = batch_size
|
||||
|
|
@ -97,9 +95,7 @@ class BigQueryWrite(beam.PTransform):
|
|||
|
||||
def expand(self, input_or_inputs):
|
||||
return (input_or_inputs
|
||||
| beam.io.WriteToBigQuery(project=self.project,
|
||||
dataset=self.dataset,
|
||||
table=self.table,
|
||||
| beam.io.WriteToBigQuery(table=self.table,
|
||||
schema=self.output_schema,
|
||||
batch_size=self.batch_size,
|
||||
write_disposition=self.write_disposition)
|
||||
|
|
|
|||
|
|
@ -74,30 +74,6 @@ class ReadGithubDataset(bq_transform.BigQueryRead):
|
|||
return query
|
||||
|
||||
|
||||
class WriteFailedTokenizedData(bq_transform.BigQueryWrite):
|
||||
@property
|
||||
def column_list(self):
|
||||
return [
|
||||
('nwo', 'STRING'),
|
||||
('path', 'STRING'),
|
||||
('content', 'STRING')
|
||||
]
|
||||
|
||||
|
||||
class WriteTokenizedData(bq_transform.BigQueryWrite):
|
||||
@property
|
||||
def column_list(self):
|
||||
return [
|
||||
('nwo', 'STRING'),
|
||||
('path', 'STRING'),
|
||||
('function_name', 'STRING'),
|
||||
('lineno', 'STRING'),
|
||||
('original_function', 'STRING'),
|
||||
('function_tokens', 'STRING'),
|
||||
('docstring_tokens', 'STRING'),
|
||||
]
|
||||
|
||||
|
||||
class ReadTransformedGithubDatasetQuery(object):
|
||||
|
||||
def __init__(self, table, limit=None):
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@ import logging
|
|||
|
||||
import apache_beam as beam
|
||||
|
||||
from code_search.dataflow.transforms import bigquery
|
||||
import code_search.dataflow.do_fns.github_dataset as gh_do_fns
|
||||
import code_search.dataflow.transforms.github_bigquery as gh_bq
|
||||
|
||||
|
||||
class TransformGithubDataset(beam.PTransform):
|
||||
|
|
@ -15,19 +15,15 @@ class TransformGithubDataset(beam.PTransform):
|
|||
All tiny docstrings (smaller than `self.min_docstring_tokens`)
|
||||
are filtered out.
|
||||
|
||||
This transform creates following tables in the `target_dataset`
|
||||
This transform creates following tables
|
||||
which are defined as properties for easy modification.
|
||||
- `self.failed_tokenize_table`
|
||||
- `self.pairs_table`
|
||||
"""
|
||||
|
||||
def __init__(self, project, target_dataset,
|
||||
pairs_table=gh_bq.PAIRS_TABLE,
|
||||
failed_tokenize_table=gh_bq.FAILED_TOKENIZE_TABLE):
|
||||
def __init__(self, pairs_table, failed_tokenize_table):
|
||||
super(TransformGithubDataset, self).__init__()
|
||||
|
||||
self.project = project
|
||||
self.target_dataset = target_dataset
|
||||
self.pairs_table = pairs_table
|
||||
self.failed_tokenize_table = failed_tokenize_table
|
||||
|
||||
|
|
@ -44,10 +40,19 @@ class TransformGithubDataset(beam.PTransform):
|
|||
|
||||
pairs, tokenize_errors = tokenize_result.rows, tokenize_result.err
|
||||
|
||||
if self.target_dataset:
|
||||
if self.failed_tokenize_table:
|
||||
failed_tokenize_table_schema = bigquery.BigQuerySchema([
|
||||
('nwo', 'STRING'),
|
||||
('path', 'STRING'),
|
||||
('content', 'STRING')
|
||||
])
|
||||
|
||||
(tokenize_errors # pylint: disable=expression-not-assigned
|
||||
| "Failed Tokenization" >> gh_bq.WriteFailedTokenizedData(self.project, self.target_dataset,
|
||||
self.failed_tokenize_table)
|
||||
| "Failed Tokenization" >> beam.io.WriteToBigQuery(table=self.failed_tokenize_table,
|
||||
schema=failed_tokenize_table_schema,
|
||||
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
|
||||
write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)
|
||||
|
||||
)
|
||||
else:
|
||||
logging.info("No bigquery dataset provided; tokenization errors will "
|
||||
|
|
@ -59,13 +64,23 @@ class TransformGithubDataset(beam.PTransform):
|
|||
lambda row: len(row['docstring_tokens'].split(' ')) > self.min_docstring_tokens)
|
||||
)
|
||||
|
||||
if self.target_dataset:
|
||||
logging.info("Writing results to BigQuery %s:%s.%s",
|
||||
self.project, self.target_dataset, self.pairs_table)
|
||||
if self.pairs_table:
|
||||
logging.info("Writing results to BigQuery %s", self.pairs_table)
|
||||
tokenize_table_schema = bigquery.BigQuerySchema([
|
||||
('nwo', 'STRING'),
|
||||
('path', 'STRING'),
|
||||
('function_name', 'STRING'),
|
||||
('lineno', 'STRING'),
|
||||
('original_function', 'STRING'),
|
||||
('function_tokens', 'STRING'),
|
||||
('docstring_tokens', 'STRING'),
|
||||
])
|
||||
(flat_rows # pylint: disable=expression-not-assigned
|
||||
| "Save Tokens" >> gh_bq.WriteTokenizedData(self.project, self.target_dataset,
|
||||
self.pairs_table)
|
||||
| "Save Tokens" >> beam.io.WriteToBigQuery(table=self.pairs_table,
|
||||
schema=tokenize_table_schema,
|
||||
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
|
||||
write_disposition=beam.io.BigQueryDisposition.WRITE_EMPTY)
|
||||
)
|
||||
else:
|
||||
logging.info("target_dataset not set will not write to BigQuery")
|
||||
logging.info("pairs_table not set will not write to BigQuery")
|
||||
return flat_rows
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.serving;
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
// ksonnet appears to require name be a parameter of the prototype which is why we handle it differently.
|
||||
local name = params.name;
|
||||
|
||||
// updatedParams includes the namespace from env by default.
|
||||
// We can override namespace in params if needed
|
||||
local updatedParams = env + params;
|
||||
|
||||
local tfServingBase = import "kubeflow/tf-serving/tf-serving.libsonnet";
|
||||
local tfServing = tfServingBase {
|
||||
// Override parameters with user supplied parameters.
|
||||
params+: updatedParams {
|
||||
name: name,
|
||||
},
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(tfServing.components))
|
||||
|
|
@ -0,0 +1,198 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2tcpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
|
||||
sync: "0",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/CPU/training/yelp-model",
|
||||
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
psGpu: 0,
|
||||
workerGpu: 0,
|
||||
|
||||
workers: 3,
|
||||
masters: 1,
|
||||
ps: 1,
|
||||
|
||||
jobName: "t2tcpu",
|
||||
} + params;
|
||||
|
||||
local baseCommand = [
|
||||
"bash",
|
||||
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--hparams_set=" + updatedParams.hparams,
|
||||
"--model=" + updatedParams.model,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
];
|
||||
|
||||
local psCommand = baseCommand + [
|
||||
"--schedule=run_std_server",
|
||||
];
|
||||
|
||||
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
|
||||
|
||||
local workerBaseCommand = baseCommand + [
|
||||
"--schedule=train",
|
||||
"--sync=" + updatedParams.sync,
|
||||
"--ps_gpu=" + updatedParams.psGpu,
|
||||
"--worker_gpu=" + updatedParams.workerGpu,
|
||||
"--worker_replicas=" + totalWorkerReplicas,
|
||||
"--ps_replicas=" + updatedParams.ps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
];
|
||||
|
||||
local workerCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:worker",
|
||||
];
|
||||
|
||||
local masterCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:master",
|
||||
];
|
||||
|
||||
local gpuResources = {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
};
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local baseEnv = [
|
||||
{
|
||||
name: "PYTHONPATH",
|
||||
value: "/home/jovyan",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeEnv = baseEnv + [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumes = [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeImagePullSecrets = [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumeMounts = [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: masterCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Master
|
||||
|
||||
Worker: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: workerCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Worker
|
||||
Ps: {
|
||||
replicas: updatedParams.ps,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: psCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Ps
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,197 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2tgpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
sync: "0",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/kubeflow-demo-base-demo/GPU/training/yelp-model",
|
||||
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
psGpu: 0,
|
||||
workerGpu: 1,
|
||||
|
||||
workers: 3,
|
||||
masters: 1,
|
||||
ps: 1,
|
||||
|
||||
jobName: "t2tgpu",
|
||||
} + params;
|
||||
|
||||
local baseCommand = [
|
||||
"bash",
|
||||
"/home/jovyan/yelp_sentiment/worker_launcher.sh",
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--hparams_set=" + updatedParams.hparams,
|
||||
"--model=" + updatedParams.model,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
];
|
||||
|
||||
local psCommand = baseCommand + [
|
||||
"--schedule=run_std_server",
|
||||
];
|
||||
|
||||
local totalWorkerReplicas = updatedParams.workers + updatedParams.masters;
|
||||
|
||||
local workerBaseCommand = baseCommand + [
|
||||
"--schedule=train",
|
||||
"--sync=" + updatedParams.sync,
|
||||
"--ps_gpu=" + updatedParams.psGpu,
|
||||
"--worker_gpu=" + updatedParams.workerGpu,
|
||||
"--worker_replicas=" + totalWorkerReplicas,
|
||||
"--ps_replicas=" + updatedParams.ps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
];
|
||||
|
||||
local workerCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:worker",
|
||||
];
|
||||
|
||||
local masterCommand = workerBaseCommand + [
|
||||
"--worker_job=/job:master",
|
||||
];
|
||||
|
||||
local gpuResources = {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
};
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local baseEnv = [
|
||||
{
|
||||
name: "PYTHONPATH",
|
||||
value: "/home/jovyan",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeEnv = baseEnv + [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumes = [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeImagePullSecrets = [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local nonGkeVolumeMounts = [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
];
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: masterCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Master
|
||||
|
||||
Worker: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: workerCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: gpuResources,
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Worker
|
||||
Ps: {
|
||||
replicas: updatedParams.ps,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: psCommand,
|
||||
env: if cloud != "gke" then nonGkeEnv else baseEnv,
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
[if cloud != "gke" then "volumeMounts"]: nonGkeVolumeMounts,
|
||||
},
|
||||
],
|
||||
[if cloud != "gke" then "imagePullSecrets"]: nonGkeImagePullSecrets,
|
||||
restartPolicy: "OnFailure",
|
||||
[if cloud != "gke" then "volumes"]: nonGkeVolumes,
|
||||
},
|
||||
},
|
||||
}, // Ps
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["t2ttpu"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local updatedParams = {
|
||||
cloud: "gke",
|
||||
|
||||
dataDir: "gs://kubeflow-demo-base/featurization/yelp-data",
|
||||
usrDir: "./yelp_sentiment",
|
||||
problem: "yelp_sentiment",
|
||||
|
||||
model: "transformer_encoder",
|
||||
hparams: "transformer_yelp_sentiment",
|
||||
hparamsSet: "transformer_yelp_sentiment",
|
||||
|
||||
outputGCSPath: "gs://kubeflow-demo-base/training/yelp-model-TPU",
|
||||
|
||||
cpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-cpu:latest",
|
||||
gpuImage: "gcr.io/kubeflow-demo-base/kubeflow-yelp-demo-gpu:latest",
|
||||
|
||||
trainSteps: 1000,
|
||||
evalSteps: 10,
|
||||
|
||||
tpus: 8,
|
||||
|
||||
jobName: "t2ttpu",
|
||||
|
||||
tpuEndpoint: "$(KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS)",
|
||||
} + params;
|
||||
|
||||
local cloud = std.toString(updatedParams.cloud);
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: updatedParams.jobName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
annotations: {
|
||||
"tf-version.cloud-tpus.google.com": "1.9",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"--model=" + updatedParams.model,
|
||||
"--hparams_set=" + updatedParams.hparamsSet,
|
||||
"--problem=" + updatedParams.problem,
|
||||
"--t2t_usr_dir=" + updatedParams.usrDir,
|
||||
"--train_steps=" + updatedParams.trainSteps,
|
||||
"--eval_steps=" + updatedParams.evalSteps,
|
||||
"--data_dir=" + updatedParams.dataDir,
|
||||
"--output_dir=" + updatedParams.outputGCSPath,
|
||||
"--use_tpu",
|
||||
"--master=" + updatedParams.tpuEndpoint,
|
||||
],
|
||||
command: [
|
||||
"t2t-trainer",
|
||||
],
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
resources: {
|
||||
"limits": {
|
||||
"cloud-tpus.google.com/v2": updatedParams.tpus,
|
||||
},
|
||||
requests: {
|
||||
memory: "1Gi",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
}, // spec
|
||||
}, // template
|
||||
}, // Master
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
||||
|
|
@ -4,4 +4,4 @@ local k = import "k.libsonnet";
|
|||
|
||||
local ui = import "ui.libsonnet";
|
||||
|
||||
std.prune(k.core.v1.list.new(ui.all(params, env)))
|
||||
std.prune(k.core.v1.list.new(ui.parts(params, env)))
|
||||
|
|
@ -0,0 +1,102 @@
|
|||
{
|
||||
parts(params, env):: [
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: "kubeflow-demo-ui",
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: kubeflow_demo_ui",
|
||||
"prefix: /kubeflow_demo/",
|
||||
"rewrite: /",
|
||||
"service: kubeflow-demo-ui:80",
|
||||
]),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "kubeflow-demo-ui",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
},
|
||||
{
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "kubeflow-demo-ui",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "kubeflow-demo-ui",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"app.py",
|
||||
"--model_url",
|
||||
"http://serving:8000/model/serving:predict",
|
||||
"--data_dir",
|
||||
"gs://kubeflow-demo-base/featurization/yelp-data-1000000",
|
||||
],
|
||||
command: [
|
||||
"python",
|
||||
],
|
||||
image: params.image,
|
||||
name: "kubeflow-demo-ui",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 80,
|
||||
},
|
||||
],
|
||||
"env": [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/secret/gcp-credentials/key.json"
|
||||
},
|
||||
],
|
||||
"volumeMounts": [
|
||||
{
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
name: "gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
"imagePullSecrets": [
|
||||
{
|
||||
name: "gcp-registry-credentials",
|
||||
},
|
||||
],
|
||||
"volumes": [
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: "gcp-credentials",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
|
|
@ -115,7 +115,7 @@ ks generate tf-job-simple train
|
|||
```
|
||||
This Ksonnet protoytype needs to be slightly modified to our needs, you can simply copy an updated version of this prototype by copying the updated version from the repository.
|
||||
```
|
||||
cp ../tensorflow-model/CPU/train.jsonnet ./components/train.jsonnet
|
||||
cp ../tensorflow_model/CPU/train.jsonnet ./components/train.jsonnet
|
||||
```
|
||||
|
||||
Now we need to define the parameters which are currently set as placeholders in the training job prototype.
|
||||
|
|
@ -252,7 +252,7 @@ We will create a separate pool and install the necessary NVIDIA GPU device drive
|
|||
For more instruction on how to handle GPUs on Kubernetes, see https://cloud.google.com/kubernetes-engine/docs/how-to/gpus.
|
||||
|
||||
```
|
||||
cloud container node-pools create gpu-pool --accelerator type=nvidia-tesla-k80,count=1 --zone europe-west1-b --cluster kubeflow --num-nodes 1 --min-nodes 1 --max-nodes 1 --enable-autoscaling --scopes=https://www.googleapis.com/auth/cloud-platform
|
||||
gcloud container node-pools create gpu-pool --accelerator type=nvidia-tesla-k80,count=1 --zone europe-west1-b --cluster kubeflow --num-nodes 1 --min-nodes 1 --max-nodes 1 --enable-autoscaling --scopes=https://www.googleapis.com/auth/cloud-platform
|
||||
kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/stable/nvidia-driver-installer/cos/daemonset-preloaded.yaml
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -44,22 +44,6 @@ local tfjob = {
|
|||
},
|
||||
},
|
||||
},
|
||||
Ps: {
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: args,
|
||||
image: params.cpuImage,
|
||||
name: "tensorflow",
|
||||
workingDir: "/opt/workdir",
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
tfReplicaType: "PS",
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
build/**
|
||||
|
|
@ -1,74 +1,114 @@
|
|||
# Setup Kubeflow
|
||||
|
||||
In this part, you will setup kubeflow on an existing kubernetes cluster.
|
||||
In this section, you will setup Kubeflow on an existing Kubernetes cluster.
|
||||
|
||||
## Requirements
|
||||
|
||||
* A kubernetes cluster
|
||||
* To create a managed cluster run
|
||||
```commandline
|
||||
gcloud container clusters create kubeflow-examples-cluster
|
||||
```
|
||||
or use kubeadm: [docs](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/)
|
||||
* `kubectl` CLI (command line interface) pointing to the kubernetes cluster
|
||||
* A Kubernetes cluster
|
||||
* To create a cluster, follow the instructions on the
|
||||
[Set up Kubernetes](https://www.kubeflow.org/docs/started/getting-started/#set-up-kubernetes)
|
||||
section of the Kubeflow Getting Started guide. We recommend using a
|
||||
managed service such as Google Kubernetes Engine (GKE).
|
||||
[This link](https://www.kubeflow.org/docs/started/getting-started-gke/)
|
||||
guides you through the process of using either
|
||||
[Click-to-Deploy](https://deploy.kubeflow.cloud/#/deploy) (a web-based UI) or
|
||||
[`kfctl`](https://github.com/kubeflow/kubeflow/blob/master/scripts/kfctl.sh)
|
||||
(a CLI tool) to generate a GKE cluster with all Kubeflow components
|
||||
installed. Note that there is no need to complete the Deploy Kubeflow steps
|
||||
below if you use either of these two tools.
|
||||
* The Kubernetes CLI `kubectl` pointing to the kubernetes cluster
|
||||
* Make sure that you can run `kubectl get nodes` from your terminal
|
||||
successfully
|
||||
* The ksonnet CLI, v0.9.2 or higher: [ks](https://ksonnet.io/#get-started)
|
||||
* The ksonnet CLI [`ks`](https://ksonnet.io/#get-started), v0.9.2 or higher:
|
||||
* In case you want to install a particular version of ksonnet, you can run
|
||||
|
||||
```commandline
|
||||
export KS_VER=ks_0.11.0_linux_amd64
|
||||
wget -O /tmp/$KS_VER.tar.gz https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/$KS_VER.tar.gz
|
||||
```bash
|
||||
export KS_VER=0.13.1
|
||||
export KS_BIN=ks_${KS_VER}_linux_amd64
|
||||
wget -O /tmp/${KS_BIN}.tar.gz https://github.com/ksonnet/ksonnet/releases/download/v${KS_VER}/${KS_BIN}.tar.gz
|
||||
mkdir -p ${HOME}/bin
|
||||
tar -xvf /tmp/$KS_VER.tar.gz -C ${HOME}/bin
|
||||
export PATH=$PATH:${HOME}/bin/$KS_VER
|
||||
tar -xvf /tmp/${KS_BIN}.tar.gz -C ${HOME}/bin
|
||||
export PATH=$PATH:${HOME}/bin/${KS_BIN}
|
||||
```
|
||||
|
||||
## Kubeflow setup
|
||||
|
||||
Refer to the [user
|
||||
guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) for
|
||||
detailed instructions on how to setup kubeflow on your kubernetes cluster.
|
||||
Refer to the [guide](https://www.kubeflow.org/docs/started/getting-started/) for
|
||||
detailed instructions on how to setup Kubeflow on your Kubernetes cluster.
|
||||
Specifically, complete the following sections:
|
||||
|
||||
* [Deploy
|
||||
Kubeflow](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#deploy-kubeflow)
|
||||
* The `ks-kubeflow` directory can be used instead of creating a ksonnet
|
||||
app from scratch.
|
||||
* If you run into
|
||||
[API rate limiting errors](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md#github-rate-limiting-errors),
|
||||
ensure you have a `${GITHUB_TOKEN}` environment variable set.
|
||||
* If you run into
|
||||
[RBAC permissions issues](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#rbac-clusters)
|
||||
running `ks apply` commands, be sure you have created a `cluster-admin` ClusterRoleBinding for your username.
|
||||
* [Setup a persistent disk](https://www.kubeflow.org/docs/other-guides/advanced/)
|
||||
* We need a shared persistent disk to store our training data since
|
||||
containers' filesystems are ephemeral and don't have a lot of storage space.
|
||||
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
|
||||
name `github-issues-data`.
|
||||
* After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
|
||||
picks up the NFS mount. You can delete it by running `kubectl delete pod
|
||||
tf-hub-0 -n=${NAMESPACE}`
|
||||
* [Bringing up a
|
||||
Notebook](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#bringing-up-a-jupyter-notebook)
|
||||
* When choosing an image for your cluster in the JupyterHub UI, use the
|
||||
image from this example:
|
||||
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
|
||||
|
||||
* [Deploy Kubeflow](https://www.kubeflow.org/docs/other-guides/advanced/)
|
||||
* The latest version that was tested with this walkthrough was v0.4.0-rc.2.
|
||||
* The [`kfctl`](https://github.com/kubeflow/kubeflow/blob/master/scripts/kfctl.sh)
|
||||
CLI tool can be used to install Kubeflow on an existing cluster. Follow
|
||||
[this guide](https://www.kubeflow.org/docs/started/getting-started/#kubeflow-quick-start)
|
||||
to use `kfctl` to generate a ksonnet app, create Kubeflow manifests, and
|
||||
install all default components onto an existing Kubernetes cluster. Note
|
||||
that you can likely skip this step if you used
|
||||
[Click-to-Deploy](https://deploy.kubeflow.cloud/#/deploy)
|
||||
or `kfctl` to generate your cluster.
|
||||
|
||||
* [Setup a persistent disk](https://www.kubeflow.org/docs/guides/advanced/)
|
||||
|
||||
* We need a shared persistent disk to store our training data since
|
||||
containers' filesystems are ephemeral and don't have a lot of storage space.
|
||||
|
||||
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
|
||||
name `github-issues-data`.
|
||||
|
||||
* After the NFS is ready, delete the `jupyter-0` pod so that it gets recreated and
|
||||
picks up the NFS mount. You can delete it by running `kubectl delete pod
|
||||
jupyter-0 -n=${NAMESPACE}`
|
||||
|
||||
* [Bringing up a
|
||||
Notebook](https://www.kubeflow.org/docs/guides/components/jupyter/)
|
||||
|
||||
* When choosing an image for your cluster in the JupyterHub UI, use the
|
||||
image from this example:
|
||||
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
|
||||
|
||||
|
||||
After completing that, you should have the following ready:
|
||||
|
||||
* A ksonnet app in a directory named `ks-kubeflow`
|
||||
* An output similar to this for `kubectl get pods` command
|
||||
* A ksonnet app in a directory named `ks_app`
|
||||
* An output similar to this for `kubectl -n kubeflow get pods` command
|
||||
|
||||
```commandline
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
ambassador-75bb54594-dnxsd 2/2 Running 0 3m
|
||||
ambassador-75bb54594-hjj6m 2/2 Running 0 3m
|
||||
ambassador-75bb54594-z948h 2/2 Running 0 3m
|
||||
jupyter-chasm 1/1 Running 0 49s
|
||||
spartakus-volunteer-565b99cd69-knjf2 1/1 Running 0 3m
|
||||
tf-hub-0 1/1 Running 0 3m
|
||||
tf-job-dashboard-6c757d8684-d299l 1/1 Running 0 3m
|
||||
tf-job-operator-77776c8446-lpprm 1/1 Running 0 3m
|
||||
```bash
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
ambassador-5cf8cd97d5-6qlpz 1/1 Running 0 3m
|
||||
ambassador-5cf8cd97d5-rqzkx 1/1 Running 0 3m
|
||||
ambassador-5cf8cd97d5-wz9hl 1/1 Running 0 3m
|
||||
argo-ui-7c9c69d464-xpphz 1/1 Running 0 3m
|
||||
centraldashboard-6f47d694bd-7jfmw 1/1 Running 0 3m
|
||||
cert-manager-5cb7b9fb67-qjd9p 1/1 Running 0 3m
|
||||
cm-acme-http-solver-2jr47 1/1 Running 0 3m
|
||||
ingress-bootstrap-x6whr 1/1 Running 0 3m
|
||||
jupyter-0 1/1 Running 0 3m
|
||||
jupyter-chasm 1/1 Running 0 49s
|
||||
katib-ui-54b4667bc6-cg4jk 1/1 Running 0 3m
|
||||
metacontroller-0 1/1 Running 0 3m
|
||||
minio-7bfcc6c7b9-qrshc 1/1 Running 0 3m
|
||||
ml-pipeline-b59b58dd6-bwm8t 1/1 Running 0 3m
|
||||
ml-pipeline-persistenceagent-9ff99498c-v4k8f 1/1 Running 0 3m
|
||||
ml-pipeline-scheduledworkflow-78794fd86f-4tzxp 1/1 Running 0 3m
|
||||
ml-pipeline-ui-9884fd997-7jkdk 1/1 Running 0 3m
|
||||
ml-pipelines-load-samples-668gj 0/1 Completed 0 3m
|
||||
mysql-6f6b5f7b64-qgbkz 1/1 Running 0 3m
|
||||
pytorch-operator-6f87db67b7-nld5h 1/1 Running 0 3m
|
||||
spartakus-volunteer-7c77dc796-7jgtd 1/1 Running 0 3m
|
||||
studyjob-controller-68c6fc5bc8-jkc9q 1/1 Running 0 3m
|
||||
tf-job-dashboard-5f986cf99d-kb6gp 1/1 Running 0 3m
|
||||
tf-job-operator-v1beta1-5876c48976-q96nh 1/1 Running 0 3m
|
||||
vizier-core-78f57695d6-5t8z7 1/1 Running 0 3m
|
||||
vizier-core-rest-7d7dd7dbb8-dbr7n 1/1 Running 0 3m
|
||||
vizier-db-777675b958-c46qh 1/1 Running 0 3m
|
||||
vizier-suggestion-bayesianoptimization-7f46d8cb47-wlltt 1/1 Running 0 3m
|
||||
vizier-suggestion-grid-64c5f8bdf-2bznv 1/1 Running 0 3m
|
||||
vizier-suggestion-hyperband-8546bf5885-54hr6 1/1 Running 0 3m
|
||||
vizier-suggestion-random-c4c8d8667-l96vs 1/1 Running 0 3m
|
||||
whoami-app-7b575b555d-85nb8 1/1 Running 0 3m
|
||||
workflow-controller-5c95f95f58-hprd5 1/1 Running 0 3m
|
||||
```
|
||||
|
||||
* A Jupyter Notebook accessible at http://127.0.0.1:8000
|
||||
|
|
@ -77,10 +117,14 @@ tf-job-operator-77776c8446-lpprm 1/1 Running 0
|
|||
|
||||
## Summary
|
||||
|
||||
* We created a ksonnet app for our kubeflow deployment
|
||||
* We deployed the kubeflow-core component to our kubernetes cluster
|
||||
* We created a disk for storing our training data
|
||||
* We connected to JupyterHub and spawned a new Jupyter notebook
|
||||
* For additional details and playground visit [katacoda](https://www.katacoda.com/kubeflow/scenarios/deploying-github-issue-summarization)
|
||||
* We created a ksonnet app for our kubeflow deployment: `ks_app`.
|
||||
* We deployed the default Kubeflow components to our Kubernetes cluster.
|
||||
* We created a disk for storing our training data.
|
||||
* We connected to JupyterHub and spawned a new Jupyter notebook.
|
||||
* For additional details and self-paced learning scenarios related to this
|
||||
example, see the
|
||||
[Resources](https://www.kubeflow.org/docs/started/getting-started/#resources)
|
||||
section of the
|
||||
[Getting Started Guide](https://www.kubeflow.org/docs/started/getting-started/).
|
||||
|
||||
*Next*: [Training the model](02_training_the_model.md)
|
||||
*Next*: [Training the model with a notebook](02_training_the_model.md)
|
||||
|
|
|
|||
|
|
@ -1,23 +1,26 @@
|
|||
# Distributed training using Estimator
|
||||
|
||||
Distributed training with keras currently doesn't work; see
|
||||
Distributed training with Keras currently does not work. Do not follow this guide
|
||||
until these issues have been resolved:
|
||||
|
||||
* kubeflow/examples#280
|
||||
* kubeflow/examples#96
|
||||
* [kubeflow/examples#280](https://github.com/kubeflow/examples/issues/280)
|
||||
* [kubeflow/examples#196](https://github.com/kubeflow/examples/issues/196)
|
||||
|
||||
Requires Tensorflow 1.9 or later.
|
||||
Requires TensorFlow 1.9 or later.
|
||||
Requires [StorageClass](https://kubernetes.io/docs/concepts/storage/storage-classes/) capable of creating ReadWriteMany persistent volumes.
|
||||
|
||||
On GKE you can follow [GCFS documentation](https://master.kubeflow.org/docs/started/getting-started-gke/#using-gcfs-with-kubeflow) to enable it.
|
||||
|
||||
Estimator and Keras are both part of Tensorflow. These high level APIs are designed
|
||||
to make building models easier. In our distributed training example we will show how both
|
||||
Estimator and Keras are both part of TensorFlow. These high-level APIs are designed
|
||||
to make building models easier. In our distributed training example, we will show how both
|
||||
APIs work together to help build models that will be trainable in both single node and
|
||||
distributed manner.
|
||||
|
||||
## Keras and Estimators
|
||||
|
||||
Code required to run this example can be found in [distributed](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/distributed) directory.
|
||||
Code required to run this example can be found in the
|
||||
[distributed](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/distributed)
|
||||
directory.
|
||||
|
||||
You can read more about Estimators [here](https://www.tensorflow.org/guide/estimators).
|
||||
In our example we will leverage `model_to_estimator` function that allows to turn existing tf.keras model to estimator, and therefore allow it to
|
||||
|
|
@ -93,3 +96,7 @@ tool for us. Please refer to [documentation](https://www.tensorflow.org/guide/pr
|
|||
## Model
|
||||
|
||||
After training is complete, our model can be found in "model" PVC.
|
||||
|
||||
*Next*: [Serving the model](03_serving_the_model.md)
|
||||
|
||||
*Back*: [Setup a kubeflow cluster](01_setup_a_kubeflow_cluster.md)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
# Training the model
|
||||
# Training the model with a notebook
|
||||
|
||||
By this point, you should have a Jupyter Notebook running at http://127.0.0.1:8000.
|
||||
By this point, you should have a Jupyter notebook running at http://127.0.0.1:8000.
|
||||
|
||||
## Download training files
|
||||
|
||||
Open the Jupyter Notebook interface and create a new Terminal by clicking on
|
||||
menu, *New -> Terminal*. In the Terminal, clone this git repo by executing: `
|
||||
Open the Jupyter notebook interface and create a new Terminal by clicking on
|
||||
menu, *New -> Terminal*. In the Terminal, clone this git repo by executing:
|
||||
|
||||
```commandline
|
||||
git clone https://github.com/kubeflow/examples.git`
|
||||
```bash
|
||||
git clone https://github.com/kubeflow/examples.git
|
||||
```
|
||||
|
||||
Now you should have all the code required to complete training in the `examples/github_issue_summarization/notebooks` folder. Navigate to this folder.
|
||||
|
|
@ -19,7 +19,7 @@ Here you should see two files:
|
|||
|
||||
## Perform training
|
||||
|
||||
Open th `Training.ipynb` notebook. This contains a complete walk-through of
|
||||
Open the `Training.ipynb` notebook. This contains a complete walk-through of
|
||||
downloading the training data, preprocessing it, and training it.
|
||||
|
||||
Run the `Training.ipynb` notebook, viewing the output at each step to confirm
|
||||
|
|
@ -44,9 +44,9 @@ kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issu
|
|||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl .
|
||||
```
|
||||
|
||||
For information on:
|
||||
_(Optional)_ You can also perform training with two alternate methods:
|
||||
- [Training the model using TFJob](02_training_the_model_tfjob.md)
|
||||
- [Distributed training using tensor2tensor](02_tensor2tensor_training.md)
|
||||
- [Distributed training using Estimator](02_distributed_training.md)
|
||||
|
||||
*Next*: [Serving the model](03_serving_the_model.md)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,32 +1,35 @@
|
|||
# Training the model using TFJob
|
||||
|
||||
Kubeflow offers a TensorFlow job controller for kubernetes. This allows you to run your distributed Tensorflow training
|
||||
job on a kubernetes cluster. For this training job, we will read our training data from GCS and write our output model
|
||||
Kubeflow offers a TensorFlow job controller for Kubernetes. This allows you to run your distributed Tensorflow training
|
||||
job on a Kubernetes cluster. For this training job, we will read our training
|
||||
data from Google Cloud Storage (GCS) and write our output model
|
||||
back to GCS.
|
||||
|
||||
## Create the image for training
|
||||
|
||||
The [notebooks](notebooks) directory contains the necessary files to create a image for training. The [train.py](notebooks/train.py) file contains the training code. Here is how you can create an image and push it to gcr.
|
||||
The [notebooks](notebooks) directory contains the necessary files to create an
|
||||
image for training. The [train.py](notebooks/train.py) file contains the
|
||||
training code. Here is how you can create an image and push it to Google
|
||||
Container Registry (GCR):
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
cd notebooks/
|
||||
make PROJECT=${PROJECT} set-image
|
||||
```
|
||||
## Train Using PVC
|
||||
|
||||
If you don't have access to GCS or don't want to use GCS you
|
||||
can use a persistent volume to store the data and model.
|
||||
If you don't have access to GCS or do not wish to use GCS, you
|
||||
can use a Persistent Volume Claim (PVC) to store the data and model.
|
||||
|
||||
Create a pvc
|
||||
Note: your cluster must have a default storage class defined for this to work.
|
||||
Create a PVC:
|
||||
|
||||
```
|
||||
ks apply --env=${KF_ENV} -c data-pvc
|
||||
```
|
||||
|
||||
* Your cluster must have a default storage class defined for
|
||||
this to work.
|
||||
|
||||
Run the job to download the data to the PVC.
|
||||
|
||||
Run the job to download the data to the PVC:
|
||||
|
||||
```
|
||||
ks apply --env=${KF_ENV} -c data-downloader
|
||||
|
|
@ -38,24 +41,24 @@ Submit the training job
|
|||
ks apply --env=${KF_ENV} -c tfjob-pvc
|
||||
```
|
||||
|
||||
The resulting model will be stored on PVC so to access it you will
|
||||
need to run a pod and attach the PVC. For serving you can just
|
||||
attach it the pod serving the model.
|
||||
The resulting model will be stored on the PVC, so to access it you will
|
||||
need to run a pod and attach the PVC. For serving, you can just
|
||||
attach it to the pod serving the model.
|
||||
|
||||
## Training Using GCS
|
||||
|
||||
If you are running on GCS you can train using GCS to store the input
|
||||
If you are using GCS, you can train using GCS to store the input
|
||||
and the resulting model.
|
||||
|
||||
### GCS Service account
|
||||
### GCS service account
|
||||
|
||||
* Create a service account which will be used to read and write data from the GCS Bucket.
|
||||
* Create a service account that will be used to read and write data from the GCS bucket.
|
||||
|
||||
* Give the storage account `roles/storage.admin` role so that it can access GCS Buckets.
|
||||
* Give the storage account `roles/storage.admin` role so that it can access GCS buckets.
|
||||
|
||||
* Download its key as a json file and create a secret named `user-gcp-sa` with the key `user-gcp-sa.json`
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
SERVICE_ACCOUNT=github-issue-summarization
|
||||
PROJECT=kubeflow-example-project # The GCP Project name
|
||||
gcloud iam service-accounts --project=${PROJECT} create ${SERVICE_ACCOUNT} \
|
||||
|
|
@ -74,12 +77,12 @@ kubectl --namespace=${NAMESPACE} create secret generic user-gcp-sa --from-file=u
|
|||
|
||||
### Run the TFJob using your image
|
||||
|
||||
[ks-kubeflow](ks-kubeflow) contains a ksonnet app to deploy the TFJob.
|
||||
[ks_app](ks_app) contains a ksonnet app to deploy the TFJob.
|
||||
|
||||
Set the appropriate params for the tfjob component
|
||||
Set the appropriate params for the tfjob component:
|
||||
|
||||
```commandline
|
||||
cd ks-kubeflow
|
||||
```bash
|
||||
cd ks_app
|
||||
ks param set tfjob namespace ${NAMESPACE} --env=${KF_ENV}
|
||||
|
||||
# The image pushed in the previous step
|
||||
|
|
@ -97,30 +100,31 @@ ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output
|
|||
|
||||
Deploy the app:
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
ks apply ${KF_ENV} -c tfjob
|
||||
```
|
||||
|
||||
In a while you should see a new pod with the label `tf_job_name=tf-job-issue-summarization`
|
||||
```commandline
|
||||
kubectl get pods -n=${NAMESPACE} -ltf_job_name=tf-job-issue-summarization
|
||||
```bash
|
||||
kubectl get pods -n=${NAMESPACE} tfjob-issue-summarization-master-0
|
||||
```
|
||||
|
||||
You can view the training logs using
|
||||
|
||||
```bash
|
||||
kubectl logs -f -n=${NAMESPACE} tfjob-issue-summarization-master-0
|
||||
```
|
||||
|
||||
You can view the logs of the tf-job operator using
|
||||
|
||||
```commandline
|
||||
kubectl logs -f $(kubectl get pods -n=${NAMESPACE} -lname=tf-job-operator -o=jsonpath='{.items[0].metadata.name}')
|
||||
```bash
|
||||
kubectl logs -f -n=${NAMESPACE} $(kubectl get pods -n=${NAMESPACE} -lname=tf-job-operator -o=jsonpath='{.items[0].metadata.name}')
|
||||
```
|
||||
|
||||
You can view the actual training logs using
|
||||
|
||||
```commandline
|
||||
kubectl logs -f $(kubectl get pods -n=${NAMESPACE} -ltf_job_name=tf-job-issue-summarization -o=jsonpath='{.items[0].metadata.name}')
|
||||
```
|
||||
|
||||
For information on:
|
||||
- [Training the model](02_training_the_model.md)
|
||||
- [Distributed training using tensor2tensor](02_tensor2tensor_training.md)
|
||||
_(Optional)_ You can also perform training with two alternate methods:
|
||||
- [Training the model with a notebook](02_training_the_model.md)
|
||||
- [Distributed training using Estimator](02_distributed_training.md)
|
||||
|
||||
*Next*: [Serving the model](03_serving_the_model.md)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# Serving the model
|
||||
|
||||
We are going to use [seldon-core](https://github.com/SeldonIO/seldon-core) to serve the model. [IssueSummarization.py](notebooks/IssueSummarization.py) contains the code for this model. We will wrap this class into a seldon-core microservice which we can then deploy as a REST or GRPC API server.
|
||||
We are going to use [Seldon Core](https://github.com/SeldonIO/seldon-core) to serve the model. [IssueSummarization.py](notebooks/IssueSummarization.py) contains the code for this model. We will wrap this class into a seldon-core microservice which we can then deploy as a REST or GRPC API server.
|
||||
|
||||
> The model is written in Keras and when exported as a TensorFlow model seems to be incompatible with TensorFlow Serving. So we're using seldon-core to serve this model since seldon-core allows you to serve any arbitrary model. More details [here](https://github.com/kubeflow/examples/issues/11#issuecomment-371005885).
|
||||
> The model is written in Keras and when exported as a TensorFlow model seems to be incompatible with TensorFlow Serving. So we're using Seldon Core to serve this model since seldon-core allows you to serve any arbitrary model. More details [here](https://github.com/kubeflow/examples/issues/11#issuecomment-371005885).
|
||||
|
||||
# Building a model server
|
||||
|
||||
|
|
@ -14,42 +14,60 @@ You have two options for getting a model server
|
|||
* So you can just run this image to get a pre-trained model
|
||||
* Serving your own model using this server is discussed below
|
||||
|
||||
1. You can build your own model server as discussed below
|
||||
1. You can build your own model server as discussed below. For this you will need to install the [Source2Image executable s2i](https://github.com/openshift/source-to-image).
|
||||
|
||||
|
||||
## Wrap the model into a seldon-core microservice
|
||||
## Wrap the model into a Seldon Core microservice
|
||||
|
||||
cd into the notebooks directory and run the following docker command. This will create a build/ directory.
|
||||
Set a couple of environment variables to specify the GCP Project and the TAG you want to build the image for:
|
||||
|
||||
```
|
||||
PROJECT=my-gcp-project
|
||||
TAG=0.1
|
||||
```
|
||||
|
||||
cd into the notebooks directory and run the following command (you will need [s2i](https://github.com/openshift/source-to-image) installed):
|
||||
|
||||
```
|
||||
cd notebooks/
|
||||
docker run -v $(pwd):/my_model seldonio/core-python-wrapper:0.7 /my_model IssueSummarization 0.1 gcr.io --base-image=python:3.6 --image-name=gcr-repository-name/issue-summarization
|
||||
make build-model-image PROJECT=${PROJECT} TAG=${TAG}
|
||||
```
|
||||
|
||||
The build/ directory contains all the necessary files to build the seldon-core microservice image
|
||||
This will use [S2I](https://github.com/openshift/source-to-image) to wrap the inference code in `IssueSummarization.py` so it can be run and managed by Seldon Core.
|
||||
|
||||
|
||||
Now you should see an image named `gcr.io/<gcr-repository-name>/issue-summarization:0.1` in your docker images. To test the model, you can run it locally using:
|
||||
|
||||
```
|
||||
cd build/
|
||||
./build_image.sh
|
||||
make start-docker-model-image PROJECT=${PROJECT} TAG=${TAG}
|
||||
```
|
||||
|
||||
Now you should see an image named `gcr.io/gcr-repository-name/issue-summarization:0.1` in your docker images. To test the model, you can run it locally using
|
||||
To send an example payload to the server run:
|
||||
|
||||
`docker run -p 5000:5000 gcr.io/gcr-repository-name/issue-summarization:0.1`
|
||||
```
|
||||
make test-model-image_local
|
||||
```
|
||||
|
||||
You can push the image by running `gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization:0.1`
|
||||
or you can run a curl command explicitly such as:
|
||||
|
||||
```
|
||||
curl -g http://localhost:5000/predict --data-urlencode 'json={"data":{"ndarray":[["try to stop flask from using multiple threads"]]}}'
|
||||
```
|
||||
|
||||
To stop the running server run:
|
||||
|
||||
```
|
||||
make stop-docker-model-image
|
||||
```
|
||||
|
||||
You can push the image by running:
|
||||
|
||||
```
|
||||
make push-model-image PROJECT=${PROJECT} TAG=${TAG}
|
||||
```
|
||||
|
||||
> You can find more details about wrapping a model with seldon-core [here](https://github.com/SeldonIO/seldon-core/blob/master/docs/wrappers/python.md)
|
||||
|
||||
### Storing a model in the Docker image
|
||||
|
||||
If you want to store a copy of the model in the Docker image make sure the following files are available in the directory in which you run
|
||||
the commands in the previous steps. These files are produced by the [training](training_the_model.md) step in your `notebooks` directory:
|
||||
|
||||
* `seq2seq_model_tutorial.h5` - the keras model
|
||||
* `body_pp.dpkl` - the serialized body preprocessor
|
||||
* `title_pp.dpkl` - the serialized title preprocessor
|
||||
|
||||
|
||||
# Deploying the model to your kubernetes cluster
|
||||
|
||||
|
|
@ -58,41 +76,57 @@ Now that we have an image with our model server, we can deploy it to our kuberne
|
|||
## Deploy Seldon Core
|
||||
|
||||
|
||||
Install the CRD and it's controller using the seldon prototype
|
||||
Install the CRD and its controller using the seldon prototype. If you used
|
||||
`kfctl` to install Kubeflow, seldon is already included and you can run
|
||||
the following commands (if not, follow the
|
||||
[quick start](https://www.kubeflow.org/docs/started/getting-started/#kubeflow-quick-start)
|
||||
instructions to generate the k8s manifests first):
|
||||
|
||||
```bash
|
||||
cd ks-kubeflow
|
||||
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
|
||||
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
|
||||
# Install the kubeflow/seldon package
|
||||
ks pkg install kubeflow/seldon
|
||||
cd ks_app
|
||||
# Generate the seldon component and deploy it
|
||||
ks generate seldon seldon --name=seldon --namespace=${NAMESPACE}
|
||||
ks generate seldon seldon --namespace=${NAMESPACE}
|
||||
ks apply ${KF_ENV} -c seldon
|
||||
```
|
||||
|
||||
Seldon Core should now be running on your cluster. You can verify it by running `kubectl get pods -n${NAMESPACE}`. You should see a pod named `seldon-cluster-manager-*`
|
||||
Seldon Core should now be running on your cluster. You can verify it by running
|
||||
`kubectl get pods -n${NAMESPACE}`. You should see two pods named
|
||||
`seldon-seldon-cluster-manager-*` and `seldon-redis-*`.
|
||||
|
||||
## Deploying the actual model
|
||||
|
||||
Now that you have seldon core deployed, you can deploy the model using the `kubeflow/seldon-serve-simple` prototype.
|
||||
Now that you have seldon core deployed, you can deploy the model using the
|
||||
`seldon-serve-simple-v1alpha2` prototype.
|
||||
|
||||
```bash
|
||||
ks generate seldon-serve-simple issue-summarization-model-serving \
|
||||
ks generate seldon-serve-simple-v1alpha2 issue-summarization-model \
|
||||
--name=issue-summarization \
|
||||
--image=gcr.io/gcr-repository-name/issue-summarization:0.1 \
|
||||
--namespace=${NAMESPACE} \
|
||||
--image=gcr.io/${PROJECT}/issue-summarization-model:${TAG} \
|
||||
--replicas=2
|
||||
ks apply ${KF_ENV} -c issue-summarization-model-serving
|
||||
ks apply ${KF_ENV} -c issue-summarization-model
|
||||
```
|
||||
|
||||
The model can take quite some time to become ready due to the loading times of the models and may be restarted if it fails the default liveness probe. If this happens you can add a custom livenessProbe to the issue-summarization.jsonnet file. Add the below to the container section:
|
||||
|
||||
```
|
||||
"livenessProbe": {
|
||||
"failureThreshold": 3,
|
||||
"initialDelaySeconds": 30,
|
||||
"periodSeconds": 5,
|
||||
"successThreshold": 1,
|
||||
"handler" : {
|
||||
"tcpSocket": {
|
||||
"port": "http"
|
||||
}
|
||||
},
|
||||
```
|
||||
|
||||
# Sample request and response
|
||||
|
||||
Seldon Core uses ambassador to route it's requests. To send requests to the model, you can port-forward the ambassador container locally:
|
||||
Seldon Core uses ambassador to route its requests. To send requests to the model, you can port-forward the ambassador container locally:
|
||||
|
||||
```
|
||||
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80
|
||||
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8080:80
|
||||
```
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ In this section, you will setup a barebones web server that displays the
|
|||
prediction provided by the previously deployed model.
|
||||
|
||||
The following steps describe how to build a docker image and deploy it locally,
|
||||
where it accepts as input any arbitrary text and displays amachine-generated
|
||||
where it accepts as input any arbitrary text and displays a machine-generated
|
||||
summary.
|
||||
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ Ensure that your model is live and listening for HTTP requests as described in
|
|||
|
||||
To build the front-end docker image, issue the following commands:
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
cd docker
|
||||
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui:0.1 .
|
||||
```
|
||||
|
|
@ -29,24 +29,31 @@ To store the docker image in a location accessible to GKE, push it to the
|
|||
container registry of your choice. Here, it is pushed to Google Container
|
||||
Registry.
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization-ui:0.1
|
||||
```
|
||||
|
||||
## Deploy the front-end docker image to your kubernetes cluster
|
||||
## Deploy the front-end docker image to your Kubernetes cluster
|
||||
|
||||
The folder [ks-kubeflow](ks-kubeflow) contains a ksonnet app. The ui component
|
||||
in the `ks-kubeflow` app contains the frontend image deployment.
|
||||
The folder [`ks_app`](ks_app) contains a ksonnet app. The
|
||||
[ui component](ks_app/components/ui.jsonnet)
|
||||
in `ks_app` contains the frontend deployment.
|
||||
|
||||
To avoid rate-limiting by the GitHub API, you will need an [authentication token](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md) stored in the form of an environment variable `${GITHUB_TOKEN}`. The token does not require any permissions and is only used to prevent anonymous API calls.
|
||||
|
||||
To use this token, set it as a parameter in the ui component:
|
||||
|
||||
```commandline
|
||||
cd ks-kubeflow
|
||||
```bash
|
||||
cd ks_app
|
||||
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
|
||||
```
|
||||
|
||||
To set the URL of your trained model, add it as a parameter:
|
||||
|
||||
```bash
|
||||
ks param set ui modelUrl "http://issue-summarization.${NAMESPACE}.svc.cluster.local:8000/api/v0.1/predictions" --env ${KF_ENV}
|
||||
```
|
||||
|
||||
To serve the frontend interface, apply the `ui` component of the ksonnet app:
|
||||
|
||||
```
|
||||
|
|
@ -58,13 +65,14 @@ ks apply ${KF_ENV} -c ui
|
|||
We use `ambassador` to route requests to the frontend. You can port-forward the
|
||||
ambassador container locally:
|
||||
|
||||
```commandline
|
||||
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80
|
||||
```bash
|
||||
kubectl port-forward svc/ambassador -n ${NAMESPACE} 8080:80
|
||||
```
|
||||
|
||||
In a browser, navigate to `http://localhost:8080/issue-summarization/`, where
|
||||
you will be greeted by "Issuetext". Enter text into the input box and click
|
||||
"Submit". You should see a summary that was provided by your trained model.
|
||||
you will be greeted by a basic website. Press the *Populate Random Issue*
|
||||
button, then click *Generate Title* to view
|
||||
a summary that was provided by your trained model.
|
||||
|
||||
*Next*: [Teardown](05_teardown.md)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,21 +1,13 @@
|
|||
# Teardown
|
||||
|
||||
Delete the kubernetes `namespace`.
|
||||
If you created a cluster with Click-to-Deploy or `kfctl`, delete the deployment
|
||||
using the [GCP console](https://console.cloud.google.com/dm/deployments). The
|
||||
default deployment name is `kubeflow`.
|
||||
|
||||
```commandline
|
||||
kubectl delete namespace ${NAMESPACE}
|
||||
```
|
||||
Delete the PD (persistent disk) backing the NFS mount.
|
||||
|
||||
Delete the PD (persistent data) backing the NFS mount.
|
||||
|
||||
```commandline
|
||||
```bash
|
||||
gcloud --project=${PROJECT} compute disks delete --zone=${ZONE} ${PD_DISK_NAME}
|
||||
```
|
||||
|
||||
Delete the `kubeflow-app` directory.
|
||||
|
||||
```commandline
|
||||
rm -rf my-kubeflow
|
||||
```
|
||||
|
||||
*Back*: [Querying the model](04_querying_the_model.md)
|
||||
|
|
|
|||
|
|
@ -15,12 +15,23 @@
|
|||
# Requirements:
|
||||
# https://github.com/mattrobenolt/jinja2-cli
|
||||
# pip install jinja2-clie
|
||||
IMG = gcr.io/kubeflow-examples/issue-summarization-ui
|
||||
#
|
||||
# To override variables do
|
||||
# make ${TARGET} ${VAR}=${VALUE}
|
||||
#
|
||||
|
||||
# IMG is the base path for images..
|
||||
# Individual images will be
|
||||
# $(IMG)/$(NAME):$(TAG)
|
||||
IMG ?= gcr.io/kubeflow-examples/github-issue-summarization
|
||||
|
||||
# List any changed files. We only include files in the notebooks directory.
|
||||
# because that is the code in the docker image.
|
||||
# In particular we exclude changes to the ksonnet configs.
|
||||
CHANGED_FILES := $(shell git diff-files --relative=github_issue_summarization/docker)
|
||||
CHANGED_FILES := $(shell git diff-files --relative=github_issue_summarization/)
|
||||
|
||||
# Whether to use cached images with GCB
|
||||
USE_IMAGE_CACHE ?= true
|
||||
|
||||
ifeq ($(strip $(CHANGED_FILES)),)
|
||||
# Changed files is empty; not dirty
|
||||
|
|
@ -37,11 +48,11 @@ all: build
|
|||
# To build without the cache set the environment variable
|
||||
# export DOCKER_BUILD_OPTS=--no-cache
|
||||
build:
|
||||
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) . \
|
||||
docker build ${DOCKER_BUILD_OPTS} -t $(IMG)/ui:$(TAG) . \
|
||||
--label=git-verions=$(GIT_VERSION)
|
||||
docker tag $(IMG):$(TAG) $(IMG):latest
|
||||
@echo Built $(IMG):latest
|
||||
@echo Built $(IMG):$(TAG)
|
||||
docker tag $(IMG)/ui:$(TAG) $(IMG)/ui:latest
|
||||
@echo Built $(IMG)/ui:latest
|
||||
@echo Built $(IMG)/ui:$(TAG)
|
||||
|
||||
|
||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||
|
|
@ -49,3 +60,22 @@ build:
|
|||
push: build
|
||||
gcloud docker -- push $(IMG):$(TAG)
|
||||
@echo Pushed $(IMG):$(TAG)
|
||||
|
||||
# Build the GCB workflow
|
||||
build-gcb-spec:
|
||||
rm -rf ./build
|
||||
mkdir -p build
|
||||
jsonnet ./image_build.jsonnet --ext-str imageBase=$(IMG) \
|
||||
--ext-str gitVersion=$(GIT_VERSION) --ext-str tag=$(TAG) \
|
||||
--ext-str useImageCache=$(USE_IMAGE_CACHE) \
|
||||
> ./build/image_build.json
|
||||
|
||||
# Build using GCB. This is useful if we are on a slow internet connection
|
||||
# and don't want to pull images locally.
|
||||
# Its also used to build from our CI system.
|
||||
build-gcb: build-gcb-spec
|
||||
cp -r ./docker ./build/
|
||||
cp -r ./notebooks ./build/
|
||||
gcloud builds submit --machine-type=n1-highcpu-32 --project=kubeflow-ci \
|
||||
--config=./build/image_build.json \
|
||||
--timeout=3600 ./build
|
||||
|
|
@ -16,8 +16,8 @@ There are two primary goals for this tutorial:
|
|||
By the end of this tutorial, you should learn how to:
|
||||
|
||||
* Setup a Kubeflow cluster on an existing Kubernetes deployment
|
||||
* Spawn up a Jupyter Notebook on the cluster
|
||||
* Spawn up a shared-persistent storage across the cluster to store large
|
||||
* Spawn a Jupyter Notebook on the cluster
|
||||
* Spawn a shared-persistent storage across the cluster to store large
|
||||
datasets
|
||||
* Train a Sequence-to-Sequence model using TensorFlow and GPUs on the cluster
|
||||
* Serve the model using [Seldon Core](https://github.com/SeldonIO/seldon-core/)
|
||||
|
|
|
|||
|
|
@ -83,7 +83,8 @@ Here are the instructions for setting up the demo.
|
|||
|
||||
### Training and Deploying the model.
|
||||
|
||||
We use the ksonnet app in [github/kubeflow/examples/github_issue_summarization/ks-kubeflow](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/ks-kubeflow)
|
||||
We use the ksonnet app in
|
||||
[github/kubeflow/examples/github_issue_summarization/ks_app](https://github.com/kubeflow/examples/tree/master/github_issue_summarization/ks_app)
|
||||
|
||||
The current environment is
|
||||
|
||||
|
|
@ -94,14 +95,14 @@ export ENV=gh-demo-1003
|
|||
Set a bucket for the job output
|
||||
```
|
||||
DAY=$(date +%Y%m%d)
|
||||
ks param set --env=${ENV} tfjob-v1alpha2 output_model_gcs_bucket kubecon-gh-demo
|
||||
ks param set --env=${ENV} tfjob-v1alpha2 output_model_gcs_path gh-demo/${DAY}/output
|
||||
ks param set --env=${ENV} tfjob output_model_gcs_bucket kubecon-gh-demo
|
||||
ks param set --env=${ENV} tfjob output_model_gcs_path gh-demo/${DAY}/output
|
||||
```
|
||||
|
||||
Run the job
|
||||
|
||||
```
|
||||
ks apply ${ENV} -c tfjob-v1alpha2
|
||||
ks apply ${ENV} -c tfjob
|
||||
```
|
||||
|
||||
|
||||
|
|
@ -128,4 +129,4 @@ ks apply ${ENV} -c tfjob-v1alpha2
|
|||
|
||||
```
|
||||
ks apply ${ENV} -c tensorboard-pvc-tb
|
||||
```
|
||||
```
|
||||
|
|
|
|||
|
|
@ -1,4 +0,0 @@
|
|||
FROM golang:1.9
|
||||
|
||||
RUN mkdir -p /opt/kubeflow
|
||||
COPY ./build/git-issue-summarize-demo /opt/kubeflow
|
||||
|
|
@ -1,100 +0,0 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"ptypes",
|
||||
"ptypes/any",
|
||||
"ptypes/duration",
|
||||
"ptypes/timestamp"
|
||||
]
|
||||
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/kubeflow/katib"
|
||||
packages = ["pkg/api"]
|
||||
revision = "f24b520cc52920ae511aeea235636462ebc21d21"
|
||||
version = "v0.1.2-alpha"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"http/httpguts",
|
||||
"http2",
|
||||
"http2/hpack",
|
||||
"idna",
|
||||
"internal/timeseries",
|
||||
"trace"
|
||||
]
|
||||
revision = "4cb1c02c05b0e749b0365f61ae859a8e0cfceed9"
|
||||
|
||||
[[projects]]
|
||||
name = "golang.org/x/text"
|
||||
packages = [
|
||||
"collate",
|
||||
"collate/build",
|
||||
"internal/colltab",
|
||||
"internal/gen",
|
||||
"internal/tag",
|
||||
"internal/triegen",
|
||||
"internal/ucd",
|
||||
"language",
|
||||
"secure/bidirule",
|
||||
"transform",
|
||||
"unicode/bidi",
|
||||
"unicode/cldr",
|
||||
"unicode/norm",
|
||||
"unicode/rangetable"
|
||||
]
|
||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "google.golang.org/genproto"
|
||||
packages = ["googleapis/rpc/status"]
|
||||
revision = "ff3583edef7de132f219f0efc00e097cabcc0ec0"
|
||||
|
||||
[[projects]]
|
||||
name = "google.golang.org/grpc"
|
||||
packages = [
|
||||
".",
|
||||
"balancer",
|
||||
"balancer/base",
|
||||
"balancer/roundrobin",
|
||||
"codes",
|
||||
"connectivity",
|
||||
"credentials",
|
||||
"encoding",
|
||||
"encoding/proto",
|
||||
"grpclog",
|
||||
"internal",
|
||||
"internal/backoff",
|
||||
"internal/channelz",
|
||||
"internal/grpcrand",
|
||||
"keepalive",
|
||||
"metadata",
|
||||
"naming",
|
||||
"peer",
|
||||
"resolver",
|
||||
"resolver/dns",
|
||||
"resolver/passthrough",
|
||||
"stats",
|
||||
"status",
|
||||
"tap",
|
||||
"transport"
|
||||
]
|
||||
revision = "168a6198bcb0ef175f7dacec0b8691fc141dc9b8"
|
||||
version = "v1.13.0"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "3d9f4c7de4665d6a45accfb3d5a5a6a6ae9b98229cea14e0a8dfba942a4e49f8"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/kubeflow/katib"
|
||||
version = "0.1.2-alpha"
|
||||
|
||||
[[constraint]]
|
||||
name = "google.golang.org/grpc"
|
||||
version = "1.13.0"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
# Copyright 2017 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# Requirements:
|
||||
# Make sure ${GOPATH}/src/github.com/kubeflow/examples
|
||||
# points at a checked out version of the examples repository.
|
||||
IMG = gcr.io/kubeflow-examples/gh-issue-hp-tuner
|
||||
DIR := ${CURDIR}
|
||||
|
||||
# List any changed files.
|
||||
CHANGED_FILES := $(shell git diff-files --relative=examples/GKEDemo)
|
||||
|
||||
ifeq ($(strip $(CHANGED_FILES)),)
|
||||
# Changed files is empty; not dirty
|
||||
# Don't include --dirty because it could be dirty if files outside the ones we care
|
||||
# about changed.
|
||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always)
|
||||
else
|
||||
TAG := $(shell date +v%Y%m%d)-$(shell git describe --always --dirty)-$(shell git diff | shasum -a256 | cut -c -6)
|
||||
endif
|
||||
|
||||
all: build
|
||||
|
||||
# To build without the cache set the environment variable
|
||||
# export DOCKER_BUILD_OPTS=--no-cache
|
||||
build: Dockerfile git-issue-summarize-demo.go
|
||||
mkdir -p build
|
||||
dep ensure
|
||||
go build -i -o ./build/git-issue-summarize-demo ${GOPATH}/src/github.com/kubeflow/examples/github_issue_summarization/hp-tune/git-issue-summarize-demo.go
|
||||
docker build ${DOCKER_BUILD_OPTS} -t $(IMG):$(TAG) .
|
||||
docker tag $(IMG):$(TAG) $(IMG):latest
|
||||
@echo Built $(IMG):$(TAG)
|
||||
|
||||
# Build but don't attach the latest tag. This allows manual testing/inspection of the image
|
||||
# first.
|
||||
push: build
|
||||
gcloud docker -- push $(IMG):$(TAG)
|
||||
@echo Pushed $(IMG) with :$(TAG) tags
|
||||
|
||||
push-latest: push
|
||||
gcloud container images add-tag --quiet $(IMG):$(TAG) $(IMG):latest --verbosity=info
|
||||
echo created $(IMG):latest
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
# Experimental: HP Tuning for GitHub Issue Summarization
|
||||
|
||||
This directoy contains experimental code for adding hyperparameter
|
||||
tuning support to the GitHub issue summarization example using Katib.
|
||||
|
||||
## Instructions
|
||||
|
||||
1. Deploy Kubeflow
|
||||
1. [Deploy Katib](https://github.com/kubeflow/kubeflow/blob/master/kubeflow/katib/README.md)
|
||||
1. Create the katib namespace
|
||||
|
||||
```
|
||||
kubectl create namespace katib
|
||||
```
|
||||
|
||||
* This is a known issue [kubeflow/katib#134](https://github.com/kubeflow/katib/issues/134)
|
||||
|
||||
1. Deploy the hyperparameter tuning job
|
||||
|
||||
```
|
||||
cd kubeflow/examples/github_issue_summarization/ks-kubeflow
|
||||
ks apply ${ENVIRONMENT} -c hp-tune
|
||||
```
|
||||
|
||||
## UI
|
||||
|
||||
You can check your Model with Web UI.
|
||||
|
||||
Access to `http://${ENDPOINT}/katib/projects`
|
||||
|
||||
* If you are using GKE and IAP then ENDPOINT is the endpoint you
|
||||
are serving Kubeflow on
|
||||
|
||||
* Otherwise you can port-forward to one of the AMBASSADOR pods
|
||||
and ENDPOINT
|
||||
|
||||
```
|
||||
kubectl port-forward `kubectl get pods --selector=service=ambassador -o jsonpath='{.items[0].metadata.name}'` 8080:80
|
||||
ENDPOINT=localhost:8080
|
||||
```
|
||||
|
||||
The Results will be saved automatically.
|
||||
|
||||
## Description of git-issue-summarize-demo.go
|
||||
You can make hyperparameter and evaluate it by Katib-API.
|
||||
Katib-APIs are grpc. So you can use any language grpc supported(e.g. golang, python, c++).
|
||||
A typical case, you will call APIs in the order as below.
|
||||
In git-issue-summarize-demo.go, it wait for the status of all workers will be Completed.
|
||||
|
||||
### CreateStudy
|
||||
First, you should create Study.
|
||||
The input is StudyConfig.
|
||||
It has Study name, owner, optimization info, and Parameter config(parameter name, min, and max).
|
||||
This function generates a unique ID for your study and stores the config to DB.
|
||||
Input:
|
||||
* StudyConfig:
|
||||
* Name: string
|
||||
* Owner: string
|
||||
* OptimizationType: enum(OptimizationType_MAXIMIZE, OptimizationType_MINIMIZE)
|
||||
* OptimizationGoal: float
|
||||
* DefaultSuggestionAlgorithm: string
|
||||
* DefaultEarlyStoppingAlgorithm: string
|
||||
* ObjectiveValueName: string
|
||||
* Metrics: List of Metrics name
|
||||
* ParameterConfigs: List of parameter config.
|
||||
Return:
|
||||
* StudyID
|
||||
|
||||
### SetSuggestionParameters
|
||||
Hyperparameters are generated by suggestion services with Parameter config of Study.
|
||||
You can set the specific config for each suggestion.
|
||||
Input:
|
||||
* StudyID: ID of your study.
|
||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
||||
* SuggestionParameters: key-value pairs parameter for suggestions. The wanted key is different for each suggestion.
|
||||
Return:
|
||||
* ParameterID
|
||||
|
||||
### GetSuggestions
|
||||
This function will create Trials(set of Parameters).
|
||||
Input:
|
||||
* StudyID: ID of your study.
|
||||
* SuggestionAlgorithm: name of suggestion service (e.g. random, grid)
|
||||
* RequestNumber: the number you want to evaluate.
|
||||
* ParamID: ParameterID you got from SetSuggestionParameters func.
|
||||
Return
|
||||
* List of Trials
|
||||
* TrialID
|
||||
* Parameter Sets
|
||||
|
||||
### RunTrial
|
||||
Start to evaluate Trial.
|
||||
When you use kubernetes runtime, the pods are created the specified config.
|
||||
Input:
|
||||
* StudyId: ID of your study.
|
||||
* TrialId: ID of Trial.
|
||||
* Runtime: worker type(e.g. kubernetes)
|
||||
* WorkerConfig: runtime config
|
||||
* Image: name of docker image
|
||||
* Command: running commands
|
||||
* GPU: number of GPU
|
||||
* Scheduler: scheduler name
|
||||
Return:
|
||||
* List of WorkerID
|
||||
|
||||
### GetMetrics
|
||||
Get metrics of running workers.
|
||||
Input:
|
||||
* StudyId: ID of your study.
|
||||
* WorkerIDs: List of worker ID you want to get metrics from.
|
||||
Return:
|
||||
* List of Metrics
|
||||
|
||||
### SaveModel
|
||||
Save the Model date to KatibDB. After you called this function, you can look model info in the KatibUI.
|
||||
When you call this API multiple time, only Metrics will be updated.
|
||||
Input:
|
||||
* ModelInfo
|
||||
* StudyName
|
||||
* WorkerId
|
||||
* Parameters: List of Parameter
|
||||
* Metrics: List of Metrics
|
||||
* ModelPath: path to model saved. (PVCname:mountpath)
|
||||
* DataSet: informatino of input date
|
||||
* Name
|
||||
* Path: path to input data.(PVCname:mountpath)
|
||||
|
||||
Return:
|
||||
|
||||
### GetWorkers
|
||||
You can get worker list and status of workers.
|
||||
Input:
|
||||
Return:
|
||||
* List of worker information
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/kubeflow/katib/pkg/api"
|
||||
"google.golang.org/grpc"
|
||||
)
|
||||
|
||||
var studyConfig = api.StudyConfig{
|
||||
Name: "grid-demo",
|
||||
Owner: "katib",
|
||||
OptimizationType: api.OptimizationType_MAXIMIZE,
|
||||
OptimizationGoal: 0.99,
|
||||
ObjectiveValueName: "Validation-accuracy",
|
||||
Metrics: []string{
|
||||
"accuracy",
|
||||
},
|
||||
ParameterConfigs: &api.StudyConfig_ParameterConfigs{
|
||||
Configs: []*api.ParameterConfig{
|
||||
&api.ParameterConfig{
|
||||
Name: "--learning_rate",
|
||||
ParameterType: api.ParameterType_DOUBLE,
|
||||
Feasible: &api.FeasibleSpace{
|
||||
Min: "0.005",
|
||||
Max: "0.5",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
var gridConfig = []*api.SuggestionParameter{
|
||||
&api.SuggestionParameter{
|
||||
Name: "DefaultGrid",
|
||||
Value: "4",
|
||||
},
|
||||
&api.SuggestionParameter{
|
||||
Name: "--learning_rate",
|
||||
Value: "2",
|
||||
},
|
||||
}
|
||||
|
||||
var managerAddr = flag.String("katib_endpoint", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
|
||||
var trainerImage = flag.String("trainer_image", "gcr.io/kubeflow-dev/tf-job-issue-summarization:v20180425-e79f888", "The docker image containing the training code")
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
conn, err := grpc.Dial(*managerAddr, grpc.WithInsecure())
|
||||
if err != nil {
|
||||
log.Fatalf("could not connect: %v", err)
|
||||
}
|
||||
|
||||
workerConfig := api.WorkerConfig{
|
||||
Image: *trainerImage,
|
||||
Command: []string{
|
||||
"python",
|
||||
"/workdir/train.py",
|
||||
"--sample_size",
|
||||
"20000",
|
||||
// "--input_data_gcs_bucket",
|
||||
// "katib-gi-example",
|
||||
// "--input_data_gcs_path",
|
||||
// "github-issue-summarization-data/github-issues.zip",
|
||||
// "--output_model_gcs_bucket",
|
||||
// "katib-gi-example",
|
||||
},
|
||||
Gpu: 0,
|
||||
Scheduler: "default-scheduler",
|
||||
}
|
||||
|
||||
defer conn.Close()
|
||||
ctx := context.Background()
|
||||
c := api.NewManagerClient(conn)
|
||||
createStudyreq := &api.CreateStudyRequest{
|
||||
StudyConfig: &studyConfig,
|
||||
}
|
||||
createStudyreply, err := c.CreateStudy(ctx, createStudyreq)
|
||||
if err != nil {
|
||||
log.Fatalf("StudyConfig Error %v", err)
|
||||
}
|
||||
studyId := createStudyreply.StudyId
|
||||
log.Printf("Study ID %s", studyId)
|
||||
getStudyreq := &api.GetStudyRequest{
|
||||
StudyId: studyId,
|
||||
}
|
||||
getStudyReply, err := c.GetStudy(ctx, getStudyreq)
|
||||
if err != nil {
|
||||
log.Fatalf("GetConfig Error %v", err)
|
||||
}
|
||||
log.Printf("Study ID %s StudyConf%v", studyId, getStudyReply.StudyConfig)
|
||||
setSuggesitonParameterRequest := &api.SetSuggestionParametersRequest{
|
||||
StudyId: studyId,
|
||||
SuggestionAlgorithm: "grid",
|
||||
SuggestionParameters: gridConfig,
|
||||
}
|
||||
setSuggesitonParameterReply, err := c.SetSuggestionParameters(ctx, setSuggesitonParameterRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SetConfig Error %v", err)
|
||||
}
|
||||
log.Printf("Grid Prameter ID %s", setSuggesitonParameterReply.ParamId)
|
||||
getGridSuggestRequest := &api.GetSuggestionsRequest{
|
||||
StudyId: studyId,
|
||||
SuggestionAlgorithm: "grid",
|
||||
RequestNumber: 0,
|
||||
//RequestNumber=0 means get all grids.
|
||||
ParamId: setSuggesitonParameterReply.ParamId,
|
||||
}
|
||||
getGridSuggestReply, err := c.GetSuggestions(ctx, getGridSuggestRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("GetSuggestion Error %v", err)
|
||||
}
|
||||
log.Println("Get Grid Suggestions:")
|
||||
for _, t := range getGridSuggestReply.Trials {
|
||||
log.Printf("%v", t)
|
||||
}
|
||||
workerIds := make([]string, len(getGridSuggestReply.Trials))
|
||||
workerParameter := make(map[string][]*api.Parameter)
|
||||
for i, t := range getGridSuggestReply.Trials {
|
||||
ws := workerConfig
|
||||
rtr := &api.RunTrialRequest{
|
||||
StudyId: studyId,
|
||||
TrialId: t.TrialId,
|
||||
Runtime: "kubernetes",
|
||||
WorkerConfig: &ws,
|
||||
}
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "--output_model_gcs_path")
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, "github-issue-summarization-data/"+t.TrialId+"output_model.h5")
|
||||
for _, p := range t.ParameterSet {
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Name)
|
||||
rtr.WorkerConfig.Command = append(rtr.WorkerConfig.Command, p.Value)
|
||||
}
|
||||
workerReply, err := c.RunTrial(ctx, rtr)
|
||||
if err != nil {
|
||||
log.Fatalf("RunTrial Error %v", err)
|
||||
}
|
||||
workerIds[i] = workerReply.WorkerId
|
||||
workerParameter[workerReply.WorkerId] = t.ParameterSet
|
||||
saveModelRequest := &api.SaveModelRequest{
|
||||
Model: &api.ModelInfo{
|
||||
StudyName: studyConfig.Name,
|
||||
WorkerId: workerReply.WorkerId,
|
||||
Parameters: t.ParameterSet,
|
||||
Metrics: []*api.Metrics{},
|
||||
ModelPath: "pvc:/Path/to/Model",
|
||||
},
|
||||
DataSet: &api.DataSetInfo{
|
||||
Name: "GitHub",
|
||||
Path: "/path/to/data",
|
||||
},
|
||||
}
|
||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SaveModel Error %v", err)
|
||||
}
|
||||
log.Printf("WorkerID %s start\n", workerReply.WorkerId)
|
||||
}
|
||||
for true {
|
||||
time.Sleep(10 * time.Second)
|
||||
getMetricsRequest := &api.GetMetricsRequest{
|
||||
StudyId: studyId,
|
||||
WorkerIds: workerIds,
|
||||
}
|
||||
getMetricsReply, err := c.GetMetrics(ctx, getMetricsRequest)
|
||||
if err != nil {
|
||||
log.Printf("GetMetErr %v", err)
|
||||
continue
|
||||
}
|
||||
for _, mls := range getMetricsReply.MetricsLogSets {
|
||||
if len(mls.MetricsLogs) > 0 {
|
||||
//Only Metrics can be updated.
|
||||
saveModelRequest := &api.SaveModelRequest{
|
||||
Model: &api.ModelInfo{
|
||||
StudyName: studyConfig.Name,
|
||||
WorkerId: mls.WorkerId,
|
||||
Metrics: []*api.Metrics{},
|
||||
},
|
||||
}
|
||||
for _, ml := range mls.MetricsLogs {
|
||||
if len(ml.Values) > 0 {
|
||||
log.Printf("WorkerID %s :\t Metrics Name %s Value %v", mls.WorkerId, ml.Name, ml.Values[len(ml.Values)-1])
|
||||
saveModelRequest.Model.Metrics = append(saveModelRequest.Model.Metrics, &api.Metrics{Name: ml.Name, Value: ml.Values[len(ml.Values)-1]})
|
||||
}
|
||||
}
|
||||
_, err = c.SaveModel(ctx, saveModelRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("SaveModel Error %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
|
||||
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
|
||||
if err != nil {
|
||||
log.Fatalf("GetWorker Error %v", err)
|
||||
}
|
||||
completeCount := 0
|
||||
for _, w := range getWorkerReply.Workers {
|
||||
if w.Status == api.State_COMPLETED {
|
||||
completeCount++
|
||||
}
|
||||
}
|
||||
if completeCount == len(getWorkerReply.Workers) {
|
||||
log.Printf("All Worker Completed!")
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
// TODO(jlewi): We should tag the image latest and then
|
||||
// use latest as a cache so that rebuilds are fast
|
||||
// https://cloud.google.com/cloud-build/docs/speeding-up-builds#using_a_cached_docker_image
|
||||
{
|
||||
|
||||
// Convert non-boolean types like string,number to a boolean.
|
||||
// This is primarily intended for dealing with parameters that should be booleans.
|
||||
local toBool = function(x) {
|
||||
result::
|
||||
if std.type(x) == "boolean" then
|
||||
x
|
||||
else if std.type(x) == "string" then
|
||||
std.asciiUpper(x) == "TRUE"
|
||||
else if std.type(x) == "number" then
|
||||
x != 0
|
||||
else
|
||||
false,
|
||||
}.result,
|
||||
|
||||
local useImageCache = toBool(std.extVar("useImageCache")),
|
||||
|
||||
// A tempalte for defining the steps for building each image.
|
||||
local subGraphTemplate = {
|
||||
// following variables must be set
|
||||
name: null,
|
||||
|
||||
dockerFile: null,
|
||||
buildArg: null,
|
||||
contextDir: ".",
|
||||
|
||||
local template = self,
|
||||
|
||||
local pullStep = if useImageCache then [
|
||||
{
|
||||
id: "pull-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["pull", std.extVar("imageBase") + "/" + template.name + ":latest"],
|
||||
waitFor: ["-"],
|
||||
},
|
||||
] else [],
|
||||
|
||||
local image = std.extVar("imageBase") + "/" + template.name + ":" + std.extVar("tag"),
|
||||
local imageLatest = std.extVar("imageBase") + "/" + template.name + ":latest",
|
||||
|
||||
images: [image, imageLatest],
|
||||
steps: pullStep +
|
||||
[
|
||||
{
|
||||
local buildArgList = if template.buildArg != null then ["--build-arg", template.buildArg] else [],
|
||||
local cacheList = if useImageCache then ["--cache-from=" + imageLatest] else [],
|
||||
|
||||
id: "build-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: [
|
||||
"build",
|
||||
"-t",
|
||||
image,
|
||||
"--label=git-versions=" + std.extVar("gitVersion"),
|
||||
]
|
||||
+ buildArgList
|
||||
+ [
|
||||
"--file=" + template.dockerFile,
|
||||
]
|
||||
+ cacheList + [template.contextDir],
|
||||
waitFor: if useImageCache then ["pull-" + template.name] else ["-"],
|
||||
},
|
||||
{
|
||||
id: "tag-" + template.name,
|
||||
name: "gcr.io/cloud-builders/docker",
|
||||
args: ["tag", image, imageLatest],
|
||||
waitFor: ["build-" + template.name],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
local uiSteps = subGraphTemplate {
|
||||
name: "ui",
|
||||
dockerFile: "./docker/Dockerfile",
|
||||
contextDir: "./docker"
|
||||
},
|
||||
|
||||
local trainerSteps = subGraphTemplate {
|
||||
name: "trainer",
|
||||
dockerFile: "./notebooks/Dockerfile",
|
||||
contextDir: "./notebooks"
|
||||
},
|
||||
|
||||
local trainerEstimatorSteps = subGraphTemplate {
|
||||
name: "trainer-estimator",
|
||||
dockerFile: "./notebooks/Dockerfile.estimator",
|
||||
contextDir: "./notebooks"
|
||||
},
|
||||
|
||||
steps: uiSteps.steps + trainerSteps.steps + trainerEstimatorSteps.steps,
|
||||
images: uiSteps.images + trainerSteps.images + trainerEstimatorSteps.images,
|
||||
}
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
/lib
|
||||
/.ksonnet/registries
|
||||
/app.override.yaml
|
||||
/.ks_environment
|
||||
# Ignore all environments? Do we want to check in dev.kubeflow.org?
|
||||
/environments
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
apiVersion: 0.1.0
|
||||
gitVersion:
|
||||
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
|
||||
refSpec: master
|
||||
kind: ksonnet.io/registry
|
||||
libraries:
|
||||
apache:
|
||||
path: apache
|
||||
version: master
|
||||
efk:
|
||||
path: efk
|
||||
version: master
|
||||
mariadb:
|
||||
path: mariadb
|
||||
version: master
|
||||
memcached:
|
||||
path: memcached
|
||||
version: master
|
||||
mongodb:
|
||||
path: mongodb
|
||||
version: master
|
||||
mysql:
|
||||
path: mysql
|
||||
version: master
|
||||
nginx:
|
||||
path: nginx
|
||||
version: master
|
||||
node:
|
||||
path: node
|
||||
version: master
|
||||
postgres:
|
||||
path: postgres
|
||||
version: master
|
||||
redis:
|
||||
path: redis
|
||||
version: master
|
||||
tomcat:
|
||||
path: tomcat
|
||||
version: master
|
||||
|
|
@ -1,39 +0,0 @@
|
|||
apiVersion: 0.1.0
|
||||
gitVersion:
|
||||
commitSha: 3be196cfa1d68d9a33e0674c133ffbbcc3e57d46
|
||||
refSpec: v0.2.0-rc.1
|
||||
kind: ksonnet.io/registry
|
||||
libraries:
|
||||
argo:
|
||||
path: argo
|
||||
version: master
|
||||
automation:
|
||||
path: automation
|
||||
version: master
|
||||
core:
|
||||
path: core
|
||||
version: master
|
||||
katib:
|
||||
path: katib
|
||||
version: master
|
||||
mpi-job:
|
||||
path: mpi-job
|
||||
version: master
|
||||
new-package-stub:
|
||||
path: new-package-stub
|
||||
version: master
|
||||
openmpi:
|
||||
path: openmpi
|
||||
version: master
|
||||
pachyderm:
|
||||
path: pachyderm
|
||||
version: master
|
||||
pytorch-job:
|
||||
path: pytorch-job
|
||||
version: master
|
||||
tf-job:
|
||||
path: tf-job
|
||||
version: master
|
||||
tf-serving:
|
||||
path: tf-serving
|
||||
version: master
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
apiVersion: 0.1.0
|
||||
environments:
|
||||
cloud:
|
||||
destination:
|
||||
namespace: namespace
|
||||
server: https://35.188.73.10
|
||||
k8sVersion: v1.7.0
|
||||
path: cloud
|
||||
default:
|
||||
destination:
|
||||
namespace: default
|
||||
server: https://35.188.73.10
|
||||
k8sVersion: v1.7.0
|
||||
path: default
|
||||
gh-demo-1003:
|
||||
destination:
|
||||
namespace: kubeflow
|
||||
server: https://104.196.134.59
|
||||
k8sVersion: v1.10.7
|
||||
path: gh-demo-1003
|
||||
jlewi:
|
||||
destination:
|
||||
namespace: kubeflow
|
||||
server: https://35.196.4.129
|
||||
k8sVersion: v1.9.6
|
||||
path: jlewi
|
||||
kubecon-gh-demo-1:
|
||||
destination:
|
||||
namespace: kubeflow
|
||||
server: https://35.231.60.188
|
||||
k8sVersion: v1.7.0
|
||||
path: kubecon-gh-demo-1
|
||||
kind: ksonnet.io/app
|
||||
libraries:
|
||||
core:
|
||||
gitVersion:
|
||||
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
|
||||
refSpec: master
|
||||
name: core
|
||||
registry: kubeflow
|
||||
examples:
|
||||
gitVersion:
|
||||
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
|
||||
refSpec: master
|
||||
name: examples
|
||||
registry: kubeflow
|
||||
seldon:
|
||||
gitVersion:
|
||||
commitSha: 3da7eb254cefd6a8a79ed7db57a30adfb91b734e
|
||||
refSpec: master
|
||||
name: seldon
|
||||
registry: kubeflow
|
||||
name: ks-kubeflow
|
||||
registries:
|
||||
kubeflow:
|
||||
gitVersion:
|
||||
commitSha: 3be196cfa1d68d9a33e0674c133ffbbcc3e57d46
|
||||
refSpec: v0.2.0-rc.1
|
||||
protocol: github
|
||||
uri: github.com/kubeflow/kubeflow/tree/v0.2.0-rc.1/kubeflow
|
||||
version: 0.0.1
|
||||
|
|
@ -1,13 +0,0 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["kubeflow-core"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local all = import "kubeflow/core/all.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
{
|
||||
global: {},
|
||||
components: {
|
||||
// Component-level parameters, defined initially from 'ks prototype use ...'
|
||||
// Each object below should correspond to a component in the components/ directory
|
||||
"data-pvc": {},
|
||||
seldon: {
|
||||
apifeImage: "seldonio/apife:0.1.5",
|
||||
apifeServiceType: "NodePort",
|
||||
engineImage: "seldonio/engine:0.1.5",
|
||||
name: "seldon",
|
||||
namespace: "null",
|
||||
operatorImage: "seldonio/cluster-manager:0.1.5",
|
||||
operatorJavaOpts: "null",
|
||||
operatorSpringOpts: "null",
|
||||
withApife: "false",
|
||||
withRbac: "true",
|
||||
},
|
||||
"issue-summarization-model-serving": {
|
||||
endpoint: "REST",
|
||||
image: "gcr.io/kubeflow-examples/issue-summarization-model:v20180427-e2aa113",
|
||||
name: "issue-summarization",
|
||||
namespace: "null",
|
||||
replicas: 2,
|
||||
},
|
||||
tensorboard: {
|
||||
image: "tensorflow/tensorflow:1.7.0",
|
||||
// logDir needs to be overwritten based on where the data is
|
||||
// actually stored.
|
||||
logDir: "",
|
||||
name: "gh",
|
||||
},
|
||||
ui: {
|
||||
namespace: "null",
|
||||
githubToken: "",
|
||||
image: "gcr.io/kubeflow-examples/issue-summarization-ui:v20180629-v0.1-2-g98ed4b4-dirty-182929",
|
||||
},
|
||||
"tfjob-v1alpha2": {
|
||||
name: "tfjob-issue-summarization",
|
||||
image: "gcr.io/kubeflow-examples/tf-job-issue-summarization:v20180629-v0.1-2-g98ed4b4-dirty-182929",
|
||||
input_data_gcs_bucket: "kubeflow-examples",
|
||||
input_data_gcs_path: "github-issue-summarization-data/github-issues.zip",
|
||||
output_model_gcs_bucket: "kubeflow-examples",
|
||||
output_model_gcs_path: "github-issue-summarization-data",
|
||||
sample_size: "100000",
|
||||
gcpSecretName: "user-gcp-sa",
|
||||
gcpSecretFile: "user-gcp-sa.json",
|
||||
},
|
||||
"kubeflow-core": {
|
||||
AmbassadorImage: "quay.io/datawire/ambassador:0.30.1",
|
||||
AmbassadorServiceType: "ClusterIP",
|
||||
StatsdImage: "quay.io/datawire/statsd:0.30.1",
|
||||
centralUiImage: "gcr.io/kubeflow-images-public/centraldashboard:v20180618-v0.2.0-rc.0-5-g715aafc8-e3b0c4",
|
||||
cloud: "null",
|
||||
disks: "null",
|
||||
jupyterHubAuthenticator: "null",
|
||||
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:v20180531-3bb991b1",
|
||||
jupyterHubServiceType: "ClusterIP",
|
||||
jupyterNotebookPVCMount: "/home/jovyan",
|
||||
jupyterNotebookRegistry: "gcr.io",
|
||||
jupyterNotebookRepoName: "kubeflow-images-public",
|
||||
name: "kubeflow-core",
|
||||
namespace: "null",
|
||||
reportUsage: "false",
|
||||
tfDefaultImage: "null",
|
||||
tfJobImage: "gcr.io/kubeflow-images-public/tf_operator:v0.2.0",
|
||||
tfJobUiServiceType: "ClusterIP",
|
||||
tfJobVersion: "v1alpha2",
|
||||
usageId: "unknown_cluster",
|
||||
},
|
||||
"tensor2tensor-v1alpha2": {
|
||||
name: "tensor2tensor-v1alpha2",
|
||||
},
|
||||
"data-downloader": {},
|
||||
"tfjob-pvc-v1alpha2": {
|
||||
name: "tfjob-pvc-v1alpha2",
|
||||
},
|
||||
"hp-tune": {},
|
||||
// Run tensorboard with pvc.
|
||||
// This is intended for use with tfjob-estimator
|
||||
},
|
||||
}
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.seldon;
|
||||
local k = import "k.libsonnet";
|
||||
local core = import "kubeflow/seldon/core.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
local name = params.name;
|
||||
local namespace = updatedParams.namespace;
|
||||
local withRbac = params.withRbac;
|
||||
local withApife = params.withApife;
|
||||
|
||||
// APIFE
|
||||
local apifeImage = params.apifeImage;
|
||||
local apifeServiceType = params.apifeServiceType;
|
||||
|
||||
// Cluster Manager (The CRD Operator)
|
||||
local operatorImage = params.operatorImage;
|
||||
local operatorSpringOptsParam = params.operatorSpringOpts;
|
||||
local operatorSpringOpts = if operatorSpringOptsParam != "null" then operatorSpringOptsParam else "";
|
||||
local operatorJavaOptsParam = params.operatorJavaOpts;
|
||||
local operatorJavaOpts = if operatorJavaOptsParam != "null" then operatorJavaOptsParam else "";
|
||||
|
||||
// Engine
|
||||
local engineImage = params.engineImage;
|
||||
|
||||
// APIFE
|
||||
local apife = [
|
||||
core.parts(namespace).apife(apifeImage, withRbac),
|
||||
core.parts(namespace).apifeService(apifeServiceType),
|
||||
];
|
||||
|
||||
local rbac = [
|
||||
core.parts(namespace).rbacServiceAccount(),
|
||||
core.parts(namespace).rbacClusterRoleBinding(),
|
||||
];
|
||||
|
||||
// Core
|
||||
local coreComponents = [
|
||||
core.parts(namespace).deploymentOperator(engineImage, operatorImage, operatorSpringOpts, operatorJavaOpts, withRbac),
|
||||
core.parts(namespace).redisDeployment(),
|
||||
core.parts(namespace).redisService(),
|
||||
core.parts(namespace).crd(),
|
||||
];
|
||||
|
||||
if withRbac == "true" && withApife == "true" then
|
||||
k.core.v1.list.new(apife + rbac + coreComponents)
|
||||
else if withRbac == "true" && withApife == "false" then
|
||||
k.core.v1.list.new(rbac + coreComponents)
|
||||
else if withRbac == "false" && withApife == "true" then
|
||||
k.core.v1.list.new(apife + coreComponents)
|
||||
else if withRbac == "false" && withApife == "false" then
|
||||
k.core.v1.list.new(coreComponents)
|
||||
|
|
@ -1,85 +0,0 @@
|
|||
{
|
||||
all(params, env):: [
|
||||
$.parts(params, env).service,
|
||||
$.parts(params, env).deployment,
|
||||
],
|
||||
|
||||
parts(params, env):: {
|
||||
// Define some defaults.
|
||||
local updatedParams = {
|
||||
serviceType: "ClusterIP",
|
||||
image: "gcr.io/kubeflow-images-public/issue-summarization-ui:latest",
|
||||
modelUrl: "http://issue-summarization.kubeflow.svc.cluster.local:8000/api/v0.1/predictions",
|
||||
} + params,
|
||||
|
||||
service:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: "issue-summarization-ui",
|
||||
namespace: env.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config": "---\napiVersion: ambassador/v0\nkind: Mapping\nname: issue_summarization_ui\nprefix: /issue-summarization/\nrewrite: /\nservice: issue-summarization-ui:80\n",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "issue-summarization-ui",
|
||||
},
|
||||
type: updatedParams.serviceType,
|
||||
},
|
||||
},
|
||||
|
||||
deployment:: {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "issue-summarization-ui",
|
||||
namespace: env.namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "issue-summarization-ui",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"app.py",
|
||||
"--model_url",
|
||||
updatedParams.modelUrl,
|
||||
],
|
||||
command: [
|
||||
"python",
|
||||
],
|
||||
image: updatedParams.image,
|
||||
env: [
|
||||
{
|
||||
name: "GITHUB_TOKEN",
|
||||
value: updatedParams.githubToken,
|
||||
}
|
||||
],
|
||||
name: "issue-summarization-ui",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 80,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // deployment
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
local params = import '../../components/params.libsonnet';
|
||||
|
||||
params + {
|
||||
components+: {
|
||||
// Insert component parameter overrides here. Ex:
|
||||
// guestbook +: {
|
||||
// name: "guestbook-dev",
|
||||
// replicas: params.global.replicas,
|
||||
// },
|
||||
"kubeflow-core"+: {
|
||||
cloud: 'gke',
|
||||
},
|
||||
ui+: {
|
||||
github_token: 'null',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
local params = import '../../components/params.libsonnet';
|
||||
|
||||
params + {
|
||||
components+: {},
|
||||
}
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
local base = import "base.libsonnet";
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
local params = import '../../components/params.libsonnet';
|
||||
|
||||
params + {
|
||||
components+: {},
|
||||
}
|
||||
|
|
@ -1,80 +0,0 @@
|
|||
local k8s = import "k8s.libsonnet";
|
||||
|
||||
local apps = k8s.apps;
|
||||
local core = k8s.core;
|
||||
local extensions = k8s.extensions;
|
||||
|
||||
local hidden = {
|
||||
mapContainers(f):: {
|
||||
local podContainers = super.spec.template.spec.containers,
|
||||
spec+: {
|
||||
template+: {
|
||||
spec+: {
|
||||
// IMPORTANT: This overwrites the 'containers' field
|
||||
// for this deployment.
|
||||
containers: std.map(f, podContainers),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
mapContainersWithName(names, f) ::
|
||||
local nameSet =
|
||||
if std.type(names) == "array"
|
||||
then std.set(names)
|
||||
else std.set([names]);
|
||||
local inNameSet(name) = std.length(std.setInter(nameSet, std.set([name]))) > 0;
|
||||
self.mapContainers(
|
||||
function(c)
|
||||
if std.objectHas(c, "name") && inNameSet(c.name)
|
||||
then f(c)
|
||||
else c
|
||||
),
|
||||
};
|
||||
|
||||
k8s + {
|
||||
apps:: apps + {
|
||||
v1beta1:: apps.v1beta1 + {
|
||||
local v1beta1 = apps.v1beta1,
|
||||
|
||||
daemonSet:: v1beta1.daemonSet + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
|
||||
deployment:: v1beta1.deployment + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
core:: core + {
|
||||
v1:: core.v1 + {
|
||||
list:: {
|
||||
new(items)::
|
||||
{apiVersion: "v1"} +
|
||||
{kind: "List"} +
|
||||
self.items(items),
|
||||
|
||||
items(items):: if std.type(items) == "array" then {items+: items} else {items+: [items]},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
extensions:: extensions + {
|
||||
v1beta1:: extensions.v1beta1 + {
|
||||
local v1beta1 = extensions.v1beta1,
|
||||
|
||||
daemonSet:: v1beta1.daemonSet + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
|
||||
deployment:: v1beta1.deployment + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -1,83 +0,0 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["tensor2tensor-v1alpha2"];
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
local name = params.name;
|
||||
local namespace = env.namespace;
|
||||
|
||||
local tfjob = {
|
||||
apiVersion: "kubeflow.org/v1alpha2",
|
||||
kind: "TFJob",
|
||||
metadata: {
|
||||
name: name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
tfReplicaSpecs: {
|
||||
Master: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
command: masterCommand,
|
||||
env: containerEnv,
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
}, // Master
|
||||
|
||||
Worker: {
|
||||
replicas: updatedParams.workers,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: if updatedParams.workerGpu > 0 then updatedParams.gpuImage else updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
command: workerCommand,
|
||||
env: containerEnv,
|
||||
[if updatedParams.workerGpu > 0 then "resources"]: {
|
||||
limits: {
|
||||
"nvidia.com/gpu": updatedParams.workerGpu,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
}, // Worker
|
||||
Ps: {
|
||||
replicas: updatedParams.ps,
|
||||
template: {
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: updatedParams.cpuImage,
|
||||
name: "tensorflow",
|
||||
command: psCommand,
|
||||
env: containerEnv,
|
||||
},
|
||||
],
|
||||
restartPolicy: "OnFailure",
|
||||
},
|
||||
},
|
||||
}, // Ps
|
||||
}, // tfReplicaSpecs
|
||||
}, // Spec
|
||||
}; // tfJob
|
||||
|
||||
k.core.v1.list.new([
|
||||
tfjob,
|
||||
])
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
parts(params):: {
|
||||
local ambassador = import "kubeflow/core/ambassador.libsonnet",
|
||||
local jupyterhub = import "kubeflow/core/jupyterhub.libsonnet",
|
||||
local nfs = import "kubeflow/core/nfs.libsonnet",
|
||||
local tfjob = import "kubeflow/core/tf-job-operator.libsonnet",
|
||||
local spartakus = import "kubeflow/core/spartakus.libsonnet",
|
||||
local centraldashboard = import "kubeflow/core/centraldashboard.libsonnet",
|
||||
local version = import "kubeflow/core/version.libsonnet",
|
||||
|
||||
all:: jupyterhub.all(params)
|
||||
+ tfjob.all(params)
|
||||
+ ambassador.all(params)
|
||||
+ nfs.all(params)
|
||||
+ spartakus.all(params)
|
||||
+ centraldashboard.all(params)
|
||||
+ version.all(params),
|
||||
},
|
||||
}
|
||||
|
|
@ -1,266 +0,0 @@
|
|||
{
|
||||
all(params):: [
|
||||
$.parts(params.namespace, params.AmbassadorImage).service(params.AmbassadorServiceType),
|
||||
$.parts(params.namespace, params.AmbassadorImage).adminService,
|
||||
$.parts(params.namespace, params.AmbassadorImage).role,
|
||||
$.parts(params.namespace, params.AmbassadorImage).serviceAccount,
|
||||
$.parts(params.namespace, params.AmbassadorImage).roleBinding,
|
||||
$.parts(params.namespace, params.AmbassadorImage).deploy(params.StatsdImage),
|
||||
$.parts(params.namespace, params.AmbassadorImage).k8sDashboard(params.cloud),
|
||||
],
|
||||
|
||||
parts(namespace, ambassadorImage):: {
|
||||
service(serviceType):: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador",
|
||||
},
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "ambassador",
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "ambassador",
|
||||
},
|
||||
type: serviceType,
|
||||
},
|
||||
}, // service
|
||||
|
||||
adminService:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador-admin",
|
||||
},
|
||||
name: "ambassador-admin",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "ambassador-admin",
|
||||
port: 8877,
|
||||
targetPort: 8877,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "ambassador",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}, // adminService
|
||||
|
||||
role:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"services",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"configmaps",
|
||||
],
|
||||
verbs: [
|
||||
"create",
|
||||
"update",
|
||||
"patch",
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"secrets",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
],
|
||||
}, // role
|
||||
|
||||
serviceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
}, // serviceAccount
|
||||
|
||||
roleBinding:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: "ambassador",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
}, // roleBinding
|
||||
|
||||
deploy(statsdImage):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 3,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador",
|
||||
},
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
env: [
|
||||
{
|
||||
name: "AMBASSADOR_NAMESPACE",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "metadata.namespace",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AMBASSADOR_SINGLE_NAMESPACE",
|
||||
value: "true",
|
||||
},
|
||||
],
|
||||
image: ambassadorImage,
|
||||
livenessProbe: {
|
||||
httpGet: {
|
||||
path: "/ambassador/v0/check_alive",
|
||||
port: 8877,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
name: "ambassador",
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: "/ambassador/v0/check_ready",
|
||||
port: 8877,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: 1,
|
||||
memory: "400Mi",
|
||||
},
|
||||
requests: {
|
||||
cpu: "200m",
|
||||
memory: "100Mi",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
image: statsdImage,
|
||||
name: "statsd",
|
||||
},
|
||||
],
|
||||
restartPolicy: "Always",
|
||||
serviceAccountName: "ambassador",
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // deploy
|
||||
|
||||
isDashboardTls(cloud)::
|
||||
if cloud == "acsengine" || cloud == "aks" then
|
||||
"false"
|
||||
else
|
||||
"true",
|
||||
// This service adds a rule to our reverse proxy for accessing the K8s dashboard.
|
||||
k8sDashboard(cloud):: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: "k8s-dashboard",
|
||||
namespace: namespace,
|
||||
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: k8s-dashboard-ui-mapping",
|
||||
"prefix: /k8s/ui/",
|
||||
"rewrite: /",
|
||||
"tls: " + $.parts(namespace, ambassadorImage).isDashboardTls(cloud),
|
||||
// We redirect to the K8s service created for the dashboard
|
||||
// in namespace kube-system. We don't use the k8s-dashboard service
|
||||
// because that isn't in the kube-system namespace and I don't think
|
||||
// it can select pods in a different namespace.
|
||||
"service: kubernetes-dashboard.kube-system",
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 443,
|
||||
targetPort: 8443,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
"k8s-app": "kubernetes-dashboard",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}, // k8sDashboard
|
||||
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,159 +0,0 @@
|
|||
{
|
||||
// TODO(https://github.com/ksonnet/ksonnet/issues/222): Taking namespace as an argument is a work around for the fact that ksonnet
|
||||
// doesn't support automatically piping in the namespace from the environment to prototypes.
|
||||
|
||||
// TODO(https://github.com/kubeflow/kubeflow/issues/527):
|
||||
// We need to build and publish central UI docker image as part of our release process.
|
||||
|
||||
all(params):: [
|
||||
$.parts(params.namespace).deployUi(params.centralUiImage),
|
||||
$.parts(params.namespace).uiService,
|
||||
$.parts(params.namespace).uiServiceAccount,
|
||||
$.parts(params.namespace).uiRole,
|
||||
$.parts(params.namespace).uiRoleBinding,
|
||||
],
|
||||
|
||||
parts(namespace):: {
|
||||
|
||||
deployUi(centralUiImage):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: centralUiImage,
|
||||
name: "centraldashboard",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8082,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
serviceAccountName: "centraldashboard",
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // deployUi
|
||||
|
||||
uiService:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: centralui-mapping",
|
||||
"prefix: /",
|
||||
"rewrite: /",
|
||||
"service: centraldashboard." + namespace,
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 8082,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
sessionAffinity: "None",
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}, //service
|
||||
|
||||
uiServiceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
},
|
||||
}, // service account
|
||||
|
||||
uiRole:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: [
|
||||
"pods",
|
||||
"pods/exec",
|
||||
"pods/log",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: [
|
||||
"secrets",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
],
|
||||
},
|
||||
],
|
||||
}, // operator-role
|
||||
|
||||
uiRoleBinding:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "centraldashboard",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "centraldashboard",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
}, // role binding
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,182 +0,0 @@
|
|||
{
|
||||
parts(namespace):: {
|
||||
local k = import "k.libsonnet",
|
||||
local certManagerImage = "quay.io/jetstack/cert-manager-controller:v0.2.4",
|
||||
local certManagerIngressShimImage = "quay.io/jetstack/cert-manager-ingress-shim:v0.2.4",
|
||||
|
||||
// Note, not using std.prune to preserve required empty http01 map in the Issuer spec.
|
||||
certManagerParts(acmeEmail, acmeUrl):: k.core.v1.list.new([
|
||||
$.parts(namespace).certificateCRD,
|
||||
$.parts(namespace).clusterIssuerCRD,
|
||||
$.parts(namespace).issuerCRD,
|
||||
$.parts(namespace).serviceAccount,
|
||||
$.parts(namespace).clusterRole,
|
||||
$.parts(namespace).clusterRoleBinding,
|
||||
$.parts(namespace).deploy,
|
||||
$.parts(namespace).issuerLEProd(acmeEmail, acmeUrl),
|
||||
]),
|
||||
|
||||
certificateCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "certificates.certmanager.k8s.io",
|
||||
},
|
||||
spec: {
|
||||
group: "certmanager.k8s.io",
|
||||
version: "v1alpha1",
|
||||
names: {
|
||||
kind: "Certificate",
|
||||
plural: "certificates",
|
||||
},
|
||||
scope: "Namespaced",
|
||||
},
|
||||
},
|
||||
|
||||
clusterIssuerCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "clusterissuers.certmanager.k8s.io",
|
||||
},
|
||||
|
||||
spec: {
|
||||
group: "certmanager.k8s.io",
|
||||
version: "v1alpha1",
|
||||
names: {
|
||||
kind: "ClusterIssuer",
|
||||
plural: "clusterissuers",
|
||||
},
|
||||
scope: "Cluster",
|
||||
},
|
||||
},
|
||||
|
||||
issuerCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "issuers.certmanager.k8s.io",
|
||||
},
|
||||
spec: {
|
||||
group: "certmanager.k8s.io",
|
||||
version: "v1alpha1",
|
||||
names: {
|
||||
kind: "Issuer",
|
||||
plural: "issuers",
|
||||
},
|
||||
scope: "Namespaced",
|
||||
},
|
||||
},
|
||||
|
||||
serviceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "cert-manager",
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
|
||||
clusterRole:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
name: "cert-manager",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: ["certmanager.k8s.io"],
|
||||
resources: ["certificates", "issuers", "clusterissuers"],
|
||||
verbs: ["*"],
|
||||
},
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: ["secrets", "events", "endpoints", "services", "pods"],
|
||||
verbs: ["*"],
|
||||
},
|
||||
{
|
||||
apiGroups: ["extensions"],
|
||||
resources: ["ingresses"],
|
||||
verbs: ["*"],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
clusterRoleBinding:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
name: "cert-manager",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "cert-manager",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
name: "cert-manager",
|
||||
namespace: namespace,
|
||||
kind: "ServiceAccount",
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
deploy:: {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "cert-manager",
|
||||
namespace: namespace,
|
||||
labels: {
|
||||
app: "cert-manager",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "cert-manager",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: "cert-manager",
|
||||
containers: [
|
||||
{
|
||||
name: "cert-manager",
|
||||
image: certManagerImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
},
|
||||
{
|
||||
name: "ingress-shim",
|
||||
image: certManagerIngressShimImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
issuerLEProd(acmeEmail, acmeUrl):: {
|
||||
apiVersion: "certmanager.k8s.io/v1alpha1",
|
||||
kind: "Issuer",
|
||||
metadata: {
|
||||
name: "letsencrypt-prod",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
acme: {
|
||||
server: acmeUrl,
|
||||
email: acmeEmail,
|
||||
privateKeySecretRef: {
|
||||
name: "letsencrypt-prod-secret",
|
||||
},
|
||||
http01: {
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
|
@ -1,332 +0,0 @@
|
|||
{
|
||||
parts(namespace):: {
|
||||
local k = import "k.libsonnet",
|
||||
local cloudEndpointsImage = "gcr.io/cloud-solutions-group/cloud-endpoints-controller:0.1.1",
|
||||
local metacontrollerImage = "gcr.io/enisoc-kubernetes/metacontroller@sha256:18561c63e1c5380ac5bbaabefa933e484bdb499f10b61071506f9a0070bc65f6",
|
||||
|
||||
cloudEndpointsParts(secretName, secretKey):: k.core.v1.list.new([
|
||||
$.parts(namespace).metaServiceAccount,
|
||||
$.parts(namespace).metaClusterRole,
|
||||
$.parts(namespace).metaClusterRoleBinding,
|
||||
$.parts(namespace).metaInitializerCRD,
|
||||
$.parts(namespace).metaLambdaCRD,
|
||||
$.parts(namespace).metaDeployment,
|
||||
$.parts(namespace).endpointsCRD,
|
||||
$.parts(namespace).endpointsService,
|
||||
$.parts(namespace).endpointsServiceAccount,
|
||||
$.parts(namespace).endpointsClusterRole,
|
||||
$.parts(namespace).endpointsClusterRoleBinding,
|
||||
$.parts(namespace).endpointsDeploy(secretName, secretKey),
|
||||
$.parts(namespace).endpointsLambdaController,
|
||||
]),
|
||||
|
||||
metaServiceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "kube-metacontroller",
|
||||
namespace: namespace,
|
||||
},
|
||||
}, // metaServiceAccount
|
||||
|
||||
metaClusterRole:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
name: "kube-metacontroller",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: ["*"],
|
||||
resources: ["*"],
|
||||
verbs: ["*"],
|
||||
},
|
||||
],
|
||||
}, // metaClusterRole
|
||||
|
||||
metaClusterRoleBinding:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
name: "kube-metacontroller",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "kube-metacontroller",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
roleRef: {
|
||||
kind: "ClusterRole",
|
||||
name: "kube-metacontroller",
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
},
|
||||
}, // metaClusterRoleBinding
|
||||
|
||||
metaInitializerCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "initializercontrollers.metacontroller.k8s.io",
|
||||
},
|
||||
spec: {
|
||||
group: "metacontroller.k8s.io",
|
||||
version: "v1alpha1",
|
||||
scope: "Cluster",
|
||||
names: {
|
||||
plural: "initializercontrollers",
|
||||
singular: "initializercontroller",
|
||||
kind: "InitializerController",
|
||||
shortNames: [
|
||||
"ic",
|
||||
"ictl",
|
||||
],
|
||||
},
|
||||
},
|
||||
}, // metaInitializerCRD
|
||||
|
||||
metaLambdaCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "lambdacontrollers.metacontroller.k8s.io",
|
||||
},
|
||||
spec: {
|
||||
group: "metacontroller.k8s.io",
|
||||
version: "v1alpha1",
|
||||
scope: "Cluster",
|
||||
names: {
|
||||
plural: "lambdacontrollers",
|
||||
singular: "lambdacontroller",
|
||||
kind: "LambdaController",
|
||||
shortNames: [
|
||||
"lc",
|
||||
"lctl",
|
||||
],
|
||||
},
|
||||
},
|
||||
}, // metaLambdaCRD
|
||||
|
||||
metaDeployment:: {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "kube-metacontroller",
|
||||
namespace: namespace,
|
||||
labels: {
|
||||
app: "kube-metacontroller",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "kube-metacontroller",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: "kube-metacontroller",
|
||||
containers: [
|
||||
{
|
||||
name: "kube-metacontroller",
|
||||
image: metacontrollerImage,
|
||||
command: [
|
||||
"/usr/bin/metacontroller",
|
||||
],
|
||||
args: [
|
||||
"--logtostderr",
|
||||
],
|
||||
imagePullPolicy: "Always",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // metaDeployment
|
||||
|
||||
endpointsCRD:: {
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "cloudendpoints.ctl.isla.solutions",
|
||||
},
|
||||
spec: {
|
||||
group: "ctl.isla.solutions",
|
||||
version: "v1",
|
||||
scope: "Namespaced",
|
||||
names: {
|
||||
plural: "cloudendpoints",
|
||||
singular: "cloudendpoint",
|
||||
kind: "CloudEndpoint",
|
||||
shortNames: [
|
||||
"cloudep",
|
||||
"ce",
|
||||
],
|
||||
},
|
||||
},
|
||||
}, // endpointsCRD
|
||||
|
||||
endpointsService:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
type: "ClusterIP",
|
||||
ports: [
|
||||
{
|
||||
name: "http",
|
||||
port: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "cloud-endpoints-controller",
|
||||
},
|
||||
},
|
||||
}, // endpointsService
|
||||
|
||||
endpointsLambdaController:: {
|
||||
apiVersion: "metacontroller.k8s.io/v1alpha1",
|
||||
kind: "LambdaController",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
},
|
||||
spec: {
|
||||
parentResource: {
|
||||
apiVersion: "ctl.isla.solutions/v1",
|
||||
resource: "cloudendpoints",
|
||||
},
|
||||
childResources: [],
|
||||
clientConfig: {
|
||||
service: {
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
caBundle: "...",
|
||||
},
|
||||
},
|
||||
hooks: {
|
||||
sync: {
|
||||
path: "/sync",
|
||||
},
|
||||
},
|
||||
generateSelector: true,
|
||||
},
|
||||
}, // endpointsLambdaController
|
||||
|
||||
endpointsServiceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
},
|
||||
}, // endpointsServiceAccount
|
||||
|
||||
endpointsClusterRole:: {
|
||||
kind: "ClusterRole",
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: ["services"],
|
||||
verbs: ["get", "list"],
|
||||
},
|
||||
{
|
||||
apiGroups: ["extensions"],
|
||||
resources: ["ingresses"],
|
||||
verbs: ["get", "list"],
|
||||
},
|
||||
],
|
||||
}, // endpointsClusterRole
|
||||
|
||||
endpointsClusterRoleBinding:: {
|
||||
kind: "ClusterRoleBinding",
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
roleRef: {
|
||||
kind: "ClusterRole",
|
||||
name: "cloud-endpoints-controller",
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
},
|
||||
}, // endpointsClusterRoleBinding
|
||||
|
||||
endpointsDeploy(secretName, secretKey):: {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "cloud-endpoints-controller",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "cloud-endpoints-controller",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: "cloud-endpoints-controller",
|
||||
terminationGracePeriodSeconds: 5,
|
||||
containers: [
|
||||
{
|
||||
name: "cloud-endpoints-controller",
|
||||
image: cloudEndpointsImage,
|
||||
imagePullPolicy: "Always",
|
||||
env: [
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/var/run/secrets/sa/" + secretKey,
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
name: "sa-key",
|
||||
readOnly: true,
|
||||
mountPath: "/var/run/secrets/sa",
|
||||
},
|
||||
],
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: "/healthz",
|
||||
port: 80,
|
||||
scheme: "HTTP",
|
||||
},
|
||||
periodSeconds: 5,
|
||||
timeoutSeconds: 5,
|
||||
successThreshold: 1,
|
||||
failureThreshold: 2,
|
||||
},
|
||||
},
|
||||
],
|
||||
volumes: [
|
||||
{
|
||||
name: "sa-key",
|
||||
secret: {
|
||||
secretName: secretName,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // endpointsDeploy
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,74 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# A script to modify envoy config to perform JWT validation
|
||||
# given the information for the service.
|
||||
# Script executed by the iap container to configure IAP. When finished, the envoy config is created with the JWT audience.
|
||||
|
||||
[ -z ${CLIENT_ID} ] && echo Error CLIENT_ID must be set && exit 1
|
||||
[ -z ${CLIENT_SECRET} ] && echo Error CLIENT_SECRET must be set && exit 1
|
||||
[ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1
|
||||
[ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1
|
||||
|
||||
apk add --update jq
|
||||
curl https://storage.googleapis.com/kubernetes-release/release/v1.9.4/bin/linux/amd64/kubectl > /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl
|
||||
|
||||
|
||||
PROJECT=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
|
||||
if [ -z ${PROJECT} ]; then
|
||||
echo Error unable to fetch PROJECT from compute metadata
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PROJECT_NUM=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/numeric-project-id)
|
||||
if [ -z ${PROJECT_NUM} ]; then
|
||||
echo Error unable to fetch PROJECT_NUM from compute metadata
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Activate the service account
|
||||
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
|
||||
# Print out the config for debugging
|
||||
gcloud config list
|
||||
|
||||
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
while [[ -z ${BACKEND_ID} ]];
|
||||
do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)');
|
||||
echo "Waiting for backend id PROJECT=${PROJECT} NAMESPACE=${NAMESPACE} SERVICE=${SERVICE} filter=name~k8s-be-${NODE_PORT}-...";
|
||||
sleep 2;
|
||||
done
|
||||
echo BACKEND_ID=${BACKEND_ID}
|
||||
|
||||
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri)
|
||||
|
||||
JWT_AUDIENCE="/projects/${PROJECT_NUM}/global/backendServices/${BACKEND_ID}"
|
||||
|
||||
# For healthcheck compare.
|
||||
echo "JWT_AUDIENCE=${JWT_AUDIENCE}" > /var/shared/healthz.env
|
||||
echo "NODE_PORT=${NODE_PORT}" >> /var/shared/healthz.env
|
||||
echo "BACKEND_ID=${BACKEND_ID}" >> /var/shared/healthz.env
|
||||
|
||||
kubectl get configmap -n ${NAMESPACE} envoy-config -o jsonpath='{.data.envoy-config\.json}' | \
|
||||
sed -e "s|{{JWT_AUDIENCE}}|${JWT_AUDIENCE}|g" > /var/shared/envoy-config.json
|
||||
|
||||
echo "Restarting envoy"
|
||||
curl -s ${ENVOY_ADMIN}/quitquitquit
|
||||
|
||||
function checkIAP() {
|
||||
# created by init container.
|
||||
. /var/shared/healthz.env
|
||||
|
||||
# If node port or backend id change, so does the JWT audience.
|
||||
CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
CURR_BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id)')
|
||||
[ "$BACKEND_ID" == "$CURR_BACKEND_ID" ]
|
||||
}
|
||||
|
||||
# Verify IAP every 10 seconds.
|
||||
while true; do
|
||||
if ! checkIAP; then
|
||||
echo "$(date) WARN: IAP check failed, restarting container."
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
|
@ -1,817 +0,0 @@
|
|||
{
|
||||
parts(namespace):: {
|
||||
local k = import "k.libsonnet",
|
||||
|
||||
// Test if the given hostname is in the form of: "NAME.endpoints.PROJECT.cloud.goog"
|
||||
local isCloudEndpoint = function(str) {
|
||||
local toks = std.split(str, "."),
|
||||
result::
|
||||
(std.length(toks) == 5 && toks[1] == "endpoints" && toks[3] == "cloud" && toks[4] == "goog"),
|
||||
}.result,
|
||||
|
||||
// Creates map of parameters from a given hostname in the form of: "NAME.endpoints.PROJECT.cloud.goog"
|
||||
local makeEndpointParams = function(str) {
|
||||
local toks = std.split(str, "."),
|
||||
result:: {
|
||||
name: toks[0],
|
||||
project: toks[2],
|
||||
},
|
||||
}.result,
|
||||
|
||||
ingressParts(secretName, ipName, hostname, issuer, envoyImage, disableJwt, oauthSecretName):: std.prune(k.core.v1.list.new([
|
||||
$.parts(namespace).service,
|
||||
$.parts(namespace).ingress(secretName, ipName, hostname),
|
||||
$.parts(namespace).certificate(secretName, hostname, issuer),
|
||||
$.parts(namespace).initServiceAccount,
|
||||
$.parts(namespace).initClusterRoleBinding,
|
||||
$.parts(namespace).initClusterRole,
|
||||
$.parts(namespace).deploy(envoyImage, oauthSecretName),
|
||||
$.parts(namespace).iapEnabler(oauthSecretName),
|
||||
$.parts(namespace).configMap(disableJwt),
|
||||
$.parts(namespace).whoamiService,
|
||||
$.parts(namespace).whoamiApp,
|
||||
(if isCloudEndpoint(hostname) then $.parts(namespace).cloudEndpoint(makeEndpointParams(hostname))),
|
||||
])),
|
||||
|
||||
service:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "envoy",
|
||||
},
|
||||
name: "envoy",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "envoy",
|
||||
port: envoyPort,
|
||||
targetPort: envoyPort,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "envoy",
|
||||
},
|
||||
// NodePort because this will be the backend for our ingress.
|
||||
type: "NodePort",
|
||||
},
|
||||
}, // service
|
||||
|
||||
initServiceAccount:: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "envoy",
|
||||
namespace: namespace,
|
||||
},
|
||||
}, // initServiceAccount
|
||||
|
||||
initClusterRoleBinding:: {
|
||||
kind: "ClusterRoleBinding",
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
metadata: {
|
||||
name: "envoy",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "envoy",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
roleRef: {
|
||||
kind: "ClusterRole",
|
||||
name: "envoy",
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
},
|
||||
}, // initClusterRoleBinding
|
||||
|
||||
initClusterRole:: {
|
||||
kind: "ClusterRole",
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
metadata: {
|
||||
name: "envoy",
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: ["services", "configmaps"],
|
||||
verbs: ["get", "list", "patch", "update"],
|
||||
},
|
||||
],
|
||||
}, // initClusterRoleBinding
|
||||
|
||||
envoyContainer(params):: {
|
||||
image: params.image,
|
||||
command: [
|
||||
"/usr/local/bin/envoy",
|
||||
"-c",
|
||||
params.configPath,
|
||||
"--log-level",
|
||||
"info",
|
||||
// Since we are running multiple instances of envoy on the same host we need to set a unique baseId
|
||||
"--base-id",
|
||||
params.baseId,
|
||||
],
|
||||
imagePullPolicy: "Always",
|
||||
name: params.name,
|
||||
livenessProbe: {
|
||||
httpGet: {
|
||||
path: params.healthPath,
|
||||
port: params.healthPort,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: params.healthPath,
|
||||
port: params.healthPort,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
ports: std.map(function(p)
|
||||
{
|
||||
containerPort: p,
|
||||
}
|
||||
, params.ports),
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: 1,
|
||||
memory: "400Mi",
|
||||
},
|
||||
requests: {
|
||||
cpu: "200m",
|
||||
memory: "100Mi",
|
||||
},
|
||||
},
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/etc/envoy",
|
||||
name: "shared",
|
||||
},
|
||||
],
|
||||
}, // envoyContainer
|
||||
|
||||
deploy(image, oauthSecretName):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "envoy",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 3,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "envoy",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: "envoy",
|
||||
containers: [
|
||||
$.parts(namespace).envoyContainer({
|
||||
image: image,
|
||||
name: "envoy",
|
||||
// We use the admin port for the health, readiness check because the main port will require a valid JWT.
|
||||
// healthPath: "/server_info",
|
||||
healthPath: "/healthz",
|
||||
healthPort: envoyPort,
|
||||
configPath: "/etc/envoy/envoy-config.json",
|
||||
baseId: "27000",
|
||||
ports: [envoyPort, envoyAdminPort, envoyStatsPort],
|
||||
}),
|
||||
{
|
||||
name: "iap",
|
||||
image: "google/cloud-sdk:alpine",
|
||||
command: [
|
||||
"sh",
|
||||
"/var/envoy-config/configure_envoy_for_iap.sh",
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "NAMESPACE",
|
||||
value: namespace,
|
||||
},
|
||||
{
|
||||
name: "CLIENT_ID",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: oauthSecretName,
|
||||
key: "CLIENT_ID",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLIENT_SECRET",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: oauthSecretName,
|
||||
key: "CLIENT_SECRET",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SERVICE",
|
||||
value: "envoy",
|
||||
},
|
||||
{
|
||||
name: "ENVOY_ADMIN",
|
||||
value: "http://localhost:" + envoyAdminPort,
|
||||
},
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/var/run/secrets/sa/admin-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/var/envoy-config/",
|
||||
name: "config-volume",
|
||||
},
|
||||
{
|
||||
mountPath: "/var/shared/",
|
||||
name: "shared",
|
||||
},
|
||||
{
|
||||
name: "sa-key",
|
||||
readOnly: true,
|
||||
mountPath: "/var/run/secrets/sa",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
restartPolicy: "Always",
|
||||
volumes: [
|
||||
{
|
||||
configMap: {
|
||||
name: "envoy-config",
|
||||
},
|
||||
name: "config-volume",
|
||||
},
|
||||
{
|
||||
emptyDir: {
|
||||
medium: "Memory",
|
||||
},
|
||||
name: "shared",
|
||||
},
|
||||
{
|
||||
name: "sa-key",
|
||||
secret: {
|
||||
secretName: "admin-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // deploy
|
||||
|
||||
// Run the process to enable iap
|
||||
iapEnabler(oauthSecretName):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "iap-enabler",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "iap-enabler",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
serviceAccountName: "envoy",
|
||||
containers: [
|
||||
{
|
||||
name: "iap",
|
||||
image: "google/cloud-sdk:alpine",
|
||||
command: [
|
||||
"sh",
|
||||
"/var/envoy-config/setup_iap.sh",
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "NAMESPACE",
|
||||
value: namespace,
|
||||
},
|
||||
{
|
||||
name: "CLIENT_ID",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: oauthSecretName,
|
||||
key: "CLIENT_ID",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CLIENT_SECRET",
|
||||
valueFrom: {
|
||||
secretKeyRef: {
|
||||
name: oauthSecretName,
|
||||
key: "CLIENT_SECRET",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SERVICE",
|
||||
value: "envoy",
|
||||
},
|
||||
{
|
||||
name: "ENVOY_ADMIN",
|
||||
value: "http://localhost:" + envoyAdminPort,
|
||||
},
|
||||
{
|
||||
name: "GOOGLE_APPLICATION_CREDENTIALS",
|
||||
value: "/var/run/secrets/sa/admin-gcp-sa.json",
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/var/envoy-config/",
|
||||
name: "config-volume",
|
||||
},
|
||||
{
|
||||
name: "sa-key",
|
||||
readOnly: true,
|
||||
mountPath: "/var/run/secrets/sa",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
restartPolicy: "Always",
|
||||
volumes: [
|
||||
{
|
||||
configMap: {
|
||||
name: "envoy-config",
|
||||
},
|
||||
name: "config-volume",
|
||||
},
|
||||
{
|
||||
name: "sa-key",
|
||||
secret: {
|
||||
secretName: "admin-gcp-sa",
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // iapEnabler
|
||||
|
||||
configMap(disableJwt):: {
|
||||
apiVersion: "v1",
|
||||
kind: "ConfigMap",
|
||||
metadata: {
|
||||
name: "envoy-config",
|
||||
namespace: namespace,
|
||||
},
|
||||
data: {
|
||||
"envoy-config.json": std.manifestJson($.parts(namespace).envoyConfig(disableJwt)),
|
||||
"setup_iap.sh": importstr "setup_iap.sh",
|
||||
"configure_envoy_for_iap.sh": importstr "configure_envoy_for_iap.sh",
|
||||
},
|
||||
},
|
||||
|
||||
local envoyPort = 8080,
|
||||
local envoyAdminPort = 8001,
|
||||
local envoyStatsPort = 8025,
|
||||
|
||||
// This is the config for the secondary envoy proxy which does JWT verification
|
||||
// and actually routes requests to the appropriate backend.
|
||||
envoyConfig(disableJwt):: {
|
||||
listeners: [
|
||||
{
|
||||
address: "tcp://0.0.0.0:" + envoyPort,
|
||||
filters: [
|
||||
{
|
||||
type: "read",
|
||||
name: "http_connection_manager",
|
||||
config: {
|
||||
codec_type: "auto",
|
||||
stat_prefix: "ingress_http",
|
||||
access_log: [
|
||||
{
|
||||
format: 'ACCESS [%START_TIME%] "%REQ(:METHOD)% %REQ(X-ENVOY-ORIGINAL-PATH?:PATH)% %PROTOCOL%" %RESPONSE_CODE% %RESPONSE_FLAGS% %BYTES_RECEIVED% %BYTES_SENT% %DURATION% %RESP(X-ENVOY-UPSTREAM-SERVICE-TIME)% "%REQ(X-FORWARDED-FOR)%" "%REQ(USER-AGENT)%" "%REQ(X-REQUEST-ID)%" "%REQ(:AUTHORITY)%" "%UPSTREAM_HOST%"\n',
|
||||
path: "/dev/fd/1",
|
||||
},
|
||||
],
|
||||
route_config: {
|
||||
virtual_hosts: [
|
||||
{
|
||||
name: "backend",
|
||||
domains: ["*"],
|
||||
routes: [
|
||||
// First route that matches is picked.
|
||||
{
|
||||
timeout_ms: 10000,
|
||||
path: "/healthz",
|
||||
prefix_rewrite: "/server_info",
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
|
||||
{ name: "cluster_healthz", weight: 100.0 },
|
||||
|
||||
],
|
||||
},
|
||||
},
|
||||
// Provide access to the whoami app skipping JWT verification.
|
||||
// this is useful for debugging.
|
||||
{
|
||||
timeout_ms: 10000,
|
||||
prefix: "/noiap/whoami",
|
||||
prefix_rewrite: "/",
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_iap_app",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
timeout_ms: 10000,
|
||||
prefix: "/whoami",
|
||||
prefix_rewrite: "/",
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_iap_app",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
// Jupyter uses the prefixes /hub & /user
|
||||
{
|
||||
// JupyterHub requires the prefix /hub
|
||||
// Use a 10 minute timeout because downloading
|
||||
// images for jupyter notebook can take a while
|
||||
timeout_ms: 600000,
|
||||
prefix: "/hub",
|
||||
prefix_rewrite: "/hub",
|
||||
use_websocket: true,
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_jupyterhub",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
// JupyterHub requires the prefix /user
|
||||
// Use a 10 minute timeout because downloading
|
||||
// images for jupyter notebook can take a while
|
||||
timeout_ms: 600000,
|
||||
prefix: "/user",
|
||||
prefix_rewrite: "/user",
|
||||
use_websocket: true,
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_jupyterhub",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
// TFJob uses the prefix /tfjobs/
|
||||
{
|
||||
timeout_ms: 10000,
|
||||
prefix: "/tfjobs",
|
||||
prefix_rewrite: "/tfjobs",
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_tfjobs",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
{
|
||||
// Route remaining traffic to Ambassador which supports dynamically adding
|
||||
// routes based on service annotations.
|
||||
timeout_ms: 10000,
|
||||
prefix: "/",
|
||||
prefix_rewrite: "/",
|
||||
use_websocket: true,
|
||||
weighted_clusters: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_ambassador",
|
||||
weight: 100.0,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
local authFilter = if disableJwt then
|
||||
[]
|
||||
else [{
|
||||
type: "decoder",
|
||||
name: "jwt-auth",
|
||||
config: {
|
||||
jwts: [
|
||||
{
|
||||
issuer: "https://cloud.google.com/iap",
|
||||
audiences: "{{JWT_AUDIENCE}}",
|
||||
jwks_uri: "https://www.gstatic.com/iap/verify/public_key-jwk",
|
||||
jwks_uri_envoy_cluster: "iap_issuer",
|
||||
jwt_headers: ["x-goog-iap-jwt-assertion"],
|
||||
},
|
||||
],
|
||||
bypass_jwt: [
|
||||
{
|
||||
http_method: "GET",
|
||||
path_exact: "/healthz",
|
||||
},
|
||||
{
|
||||
http_method: "GET",
|
||||
path_exact: "/noiap/whoami",
|
||||
},
|
||||
],
|
||||
},
|
||||
}],
|
||||
filters:
|
||||
authFilter +
|
||||
[
|
||||
{
|
||||
type: "decoder",
|
||||
name: "router",
|
||||
config: {},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
admin: {
|
||||
// We use 0.0.0.0 and not 127.0.0.1 because we want the admin server to be available on all devices
|
||||
// so that it can be used for health checking.
|
||||
address: "tcp://0.0.0.0:" + envoyAdminPort,
|
||||
access_log_path: "/tmp/admin_access_log",
|
||||
},
|
||||
cluster_manager: {
|
||||
clusters: [
|
||||
{
|
||||
name: "cluster_healthz",
|
||||
connect_timeout_ms: 3000,
|
||||
type: "strict_dns",
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
// We just use the admin server for the health check
|
||||
url: "tcp://127.0.0.1:" + envoyAdminPort,
|
||||
},
|
||||
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "iap_issuer",
|
||||
connect_timeout_ms: 5000,
|
||||
type: "strict_dns",
|
||||
circuit_breakers: {
|
||||
default: {
|
||||
max_pending_requests: 10000,
|
||||
max_requests: 10000,
|
||||
},
|
||||
},
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
url: "tcp://www.gstatic.com:80",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "cluster_iap_app",
|
||||
connect_timeout_ms: 3000,
|
||||
type: "strict_dns",
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
url: "tcp://whoami-app." + namespace + ":80",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "cluster_jupyterhub",
|
||||
connect_timeout_ms: 3000,
|
||||
type: "strict_dns",
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
url: "tcp://tf-hub-lb." + namespace + ":80",
|
||||
},
|
||||
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "cluster_tfjobs",
|
||||
connect_timeout_ms: 3000,
|
||||
type: "strict_dns",
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
url: "tcp://tf-job-dashboard." + namespace + ":80",
|
||||
},
|
||||
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "cluster_ambassador",
|
||||
connect_timeout_ms: 3000,
|
||||
type: "strict_dns",
|
||||
lb_type: "round_robin",
|
||||
hosts: [
|
||||
{
|
||||
url: "tcp://ambassador." + namespace + ":80",
|
||||
},
|
||||
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
statsd_udp_ip_address: "127.0.0.1:" + envoyStatsPort,
|
||||
stats_flush_interval_ms: 1000,
|
||||
}, // envoyConfig
|
||||
|
||||
whoamiService:: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "whoami",
|
||||
},
|
||||
name: "whoami-app",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 8081,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "whoami",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}, // whoamiService
|
||||
|
||||
whoamiApp:: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "whoami-app",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "whoami",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
env: [
|
||||
{
|
||||
name: "PORT",
|
||||
value: "8081",
|
||||
},
|
||||
],
|
||||
image: "gcr.io/cloud-solutions-group/esp-sample-app:1.0.0",
|
||||
name: "app",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8081,
|
||||
},
|
||||
],
|
||||
readinessProbe: {
|
||||
failureThreshold: 2,
|
||||
httpGet: {
|
||||
path: "/healthz",
|
||||
port: 8081,
|
||||
scheme: "HTTP",
|
||||
},
|
||||
periodSeconds: 10,
|
||||
successThreshold: 1,
|
||||
timeoutSeconds: 5,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
ingress(secretName, ipName, hostname):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Ingress",
|
||||
metadata: {
|
||||
name: "envoy-ingress",
|
||||
namespace: namespace,
|
||||
annotations: {
|
||||
"kubernetes.io/tls-acme": "true",
|
||||
"ingress.kubernetes.io/ssl-redirect": "true",
|
||||
"kubernetes.io/ingress.global-static-ip-name": ipName,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
rules: [
|
||||
{
|
||||
[if hostname != "null" then "host"]: hostname,
|
||||
http: {
|
||||
paths: [
|
||||
{
|
||||
backend: {
|
||||
// Due to https://github.com/kubernetes/contrib/blob/master/ingress/controllers/gce/examples/health_checks/README.md#limitations
|
||||
// Keep port the servicePort the same as the port we are targetting on the backend so that servicePort will be the same as targetPort for the purpose of
|
||||
// health checking.
|
||||
serviceName: "envoy",
|
||||
servicePort: envoyPort,
|
||||
},
|
||||
path: "/*",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
tls: [
|
||||
{
|
||||
secretName: secretName,
|
||||
},
|
||||
],
|
||||
},
|
||||
}, // iapIngress
|
||||
|
||||
certificate(secretName, hostname, issuer):: {
|
||||
apiVersion: "certmanager.k8s.io/v1alpha1",
|
||||
kind: "Certificate",
|
||||
metadata: {
|
||||
name: secretName,
|
||||
namespace: namespace,
|
||||
},
|
||||
|
||||
spec: {
|
||||
secretName: secretName,
|
||||
issuerRef: {
|
||||
name: issuer,
|
||||
},
|
||||
commonName: hostname,
|
||||
dnsNames: [
|
||||
hostname,
|
||||
],
|
||||
acme: {
|
||||
config: [
|
||||
{
|
||||
http01: {
|
||||
ingress: "envoy-ingress",
|
||||
},
|
||||
domains: [
|
||||
hostname,
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}, // certificate
|
||||
|
||||
cloudEndpoint(params):: {
|
||||
apiVersion: "ctl.isla.solutions/v1",
|
||||
kind: "CloudEndpoint",
|
||||
metadata: {
|
||||
name: params.name,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
project: params.project,
|
||||
targetIngress: {
|
||||
name: "envoy-ingress",
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
}, // cloudEndpoint
|
||||
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,315 +0,0 @@
|
|||
{
|
||||
all(params):: [
|
||||
$.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks),
|
||||
$.parts(params.namespace).jupyterHubService,
|
||||
$.parts(params.namespace).jupyterHubLoadBalancer(params.jupyterHubServiceType),
|
||||
$.parts(params.namespace).jupyterHub(params.jupyterHubImage, params.jupyterNotebookPVCMount, params.cloud, params.jupyterNotebookRegistry, params.jupyterNotebookRepoName),
|
||||
$.parts(params.namespace).jupyterHubRole,
|
||||
$.parts(params.namespace).jupyterHubServiceAccount,
|
||||
$.parts(params.namespace).jupyterHubRoleBinding,
|
||||
],
|
||||
|
||||
parts(namespace):: {
|
||||
jupyterHubConfigMap(jupyterHubAuthenticator, disks): {
|
||||
local util = import "kubeflow/core/util.libsonnet",
|
||||
local diskNames = util.toArray(disks),
|
||||
local kubeSpawner = $.parts(namespace).kubeSpawner(jupyterHubAuthenticator, diskNames),
|
||||
result:: $.parts(namespace).jupyterHubConfigMapWithSpawner(kubeSpawner),
|
||||
}.result,
|
||||
|
||||
kubeSpawner(authenticator, volumeClaims=[]): {
|
||||
// TODO(jlewi): We should make whether we use PVC configurable.
|
||||
local baseKubeConfigSpawner = importstr "kubeform_spawner.py",
|
||||
|
||||
authenticatorOptions:: {
|
||||
|
||||
//## Authenticator Options
|
||||
local kubeConfigDummyAuthenticator = "c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator'",
|
||||
|
||||
// This configuration allows us to use the id provided by IAP.
|
||||
local kubeConfigIAPAuthenticator = @"c.JupyterHub.authenticator_class ='jhub_remote_user_authenticator.remote_user_auth.RemoteUserAuthenticator'
|
||||
c.RemoteUserAuthenticator.header_name = 'x-goog-authenticated-user-email'",
|
||||
|
||||
options:: std.join("\n", std.prune([
|
||||
"######## Authenticator ######",
|
||||
if authenticator == "iap" then
|
||||
kubeConfigIAPAuthenticator else
|
||||
kubeConfigDummyAuthenticator,
|
||||
])),
|
||||
}.options, // authenticatorOptions
|
||||
|
||||
volumeOptions:: {
|
||||
local volumes = std.map(function(v)
|
||||
{
|
||||
name: v,
|
||||
persistentVolumeClaim: {
|
||||
claimName: v,
|
||||
},
|
||||
}, volumeClaims),
|
||||
|
||||
|
||||
local volumeMounts = std.map(function(v)
|
||||
{
|
||||
mountPath: "/mnt/" + v,
|
||||
name: v,
|
||||
}, volumeClaims),
|
||||
|
||||
options::
|
||||
if std.length(volumeClaims) > 0 then
|
||||
// we need to merge the PVC from the spawner config
|
||||
// with any added by a provisioner
|
||||
std.join("\n",
|
||||
[
|
||||
"###### Volumes #######",
|
||||
"c.KubeSpawner.volumes.extend(" + std.manifestPython(volumes) + ")",
|
||||
"c.KubeSpawner.volume_mounts.extend(" + std.manifestPython(volumeMounts) + ")",
|
||||
])
|
||||
else "",
|
||||
|
||||
}.options, // volumeOptions
|
||||
|
||||
spawner:: std.join("\n", std.prune([baseKubeConfigSpawner, self.authenticatorOptions, self.volumeOptions])),
|
||||
}.spawner, // kubeSpawner
|
||||
|
||||
local baseJupyterHubConfigMap = {
|
||||
apiVersion: "v1",
|
||||
kind: "ConfigMap",
|
||||
metadata: {
|
||||
name: "jupyterhub-config",
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
|
||||
jupyterHubConfigMapWithSpawner(spawner): baseJupyterHubConfigMap {
|
||||
data: {
|
||||
"jupyterhub_config.py": spawner,
|
||||
},
|
||||
},
|
||||
|
||||
jupyterHubService: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
name: "tf-hub-0",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
// We want a headless service so we set the ClusterIP to be None.
|
||||
// This headless server is used by individual Jupyter pods to connect back to the Hub.
|
||||
clusterIP: "None",
|
||||
ports: [
|
||||
{
|
||||
name: "hub",
|
||||
port: 8000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
jupyterHubLoadBalancer(serviceType): {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub-lb",
|
||||
},
|
||||
name: "tf-hub-lb",
|
||||
namespace: namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tf-hub-lb-hub-mapping",
|
||||
"prefix: /hub/",
|
||||
"rewrite: /hub/",
|
||||
"timeout_ms: 300000",
|
||||
"service: tf-hub-lb." + namespace,
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tf-hub-lb-user-mapping",
|
||||
"prefix: /user/",
|
||||
"rewrite: /user/",
|
||||
"timeout_ms: 300000",
|
||||
"service: tf-hub-lb." + namespace,
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "hub",
|
||||
port: 80,
|
||||
targetPort: 8000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
type: serviceType,
|
||||
},
|
||||
},
|
||||
|
||||
// image: Image for JupyterHub
|
||||
jupyterHub(image, notebookPVCMount, cloud, registry, repoName): {
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "StatefulSet",
|
||||
metadata: {
|
||||
name: "tf-hub",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
serviceName: "",
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"jupyterhub",
|
||||
"-f",
|
||||
"/etc/config/jupyterhub_config.py",
|
||||
],
|
||||
image: image,
|
||||
name: "tf-hub",
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/etc/config",
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
ports: [
|
||||
// Port 8000 is used by the hub to accept incoming requests.
|
||||
{
|
||||
containerPort: 8000,
|
||||
},
|
||||
// Port 8081 accepts callbacks from the individual Jupyter pods.
|
||||
{
|
||||
containerPort: 8081,
|
||||
},
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "NOTEBOOK_PVC_MOUNT",
|
||||
value: notebookPVCMount,
|
||||
},
|
||||
{
|
||||
name: "CLOUD_NAME",
|
||||
value: cloud,
|
||||
},
|
||||
{
|
||||
name: "REGISTRY",
|
||||
value: registry,
|
||||
},
|
||||
{
|
||||
name: "REPO_NAME",
|
||||
value: repoName,
|
||||
},
|
||||
],
|
||||
}, // jupyterHub container
|
||||
],
|
||||
serviceAccountName: "jupyter-hub",
|
||||
volumes: [
|
||||
{
|
||||
configMap: {
|
||||
name: "jupyterhub-config",
|
||||
},
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
updateStrategy: {
|
||||
type: "RollingUpdate",
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// contents based on https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/master/jupyterhub/templates/hub/rbac.yaml
|
||||
jupyterHubRole: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: "jupyter-role",
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"pods",
|
||||
"persistentvolumeclaims",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"watch",
|
||||
"list",
|
||||
"create",
|
||||
"delete",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"events",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"watch",
|
||||
"list",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
jupyterHubServiceAccount: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "jupyter-hub",
|
||||
},
|
||||
name: "jupyter-hub",
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
|
||||
jupyterHubRoleBinding: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: "jupyter-role",
|
||||
namespace: namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: "jupyter-role",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "jupyter-hub",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
},
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,165 +0,0 @@
|
|||
import json
|
||||
import os
|
||||
from kubespawner.spawner import KubeSpawner
|
||||
from jhub_remote_user_authenticator.remote_user_auth import RemoteUserAuthenticator
|
||||
from oauthenticator.github import GitHubOAuthenticator
|
||||
|
||||
|
||||
class KubeFormSpawner(KubeSpawner):
|
||||
|
||||
# relies on HTML5 for image datalist
|
||||
def _options_form_default(self):
|
||||
global registry, repoName
|
||||
return '''
|
||||
<label for='image'>Image</label>
|
||||
<input list="image" name="image" placeholder='repo/image:tag'>
|
||||
<datalist id="image">
|
||||
<option value="{0}/{1}/tensorflow-1.4.1-notebook-cpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.4.1-notebook-gpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.5.1-notebook-cpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.5.1-notebook-gpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.6.0-notebook-cpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.6.0-notebook-gpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.7.0-notebook-cpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.7.0-notebook-gpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.8.0-notebook-cpu:v0.2.0">
|
||||
<option value="{0}/{1}/tensorflow-1.8.0-notebook-gpu:v0.2.0">
|
||||
</datalist>
|
||||
<br/><br/>
|
||||
|
||||
<label for='cpu_guarantee'>CPU</label>
|
||||
<input name='cpu_guarantee' placeholder='200m, 1.0, 2.5, etc'></input>
|
||||
<br/><br/>
|
||||
|
||||
<label for='mem_guarantee'>Memory</label>
|
||||
<input name='mem_guarantee' placeholder='100Mi, 1.5Gi'></input>
|
||||
<br/><br/>
|
||||
|
||||
<label for='extra_resource_limits'>Extra Resource Limits</label>
|
||||
<input name='extra_resource_limits' placeholder='{{"nvidia.com/gpu": 3}}'></input>
|
||||
<br/><br/>
|
||||
'''.format(registry, repoName)
|
||||
|
||||
def options_from_form(self, formdata):
|
||||
options = {}
|
||||
options['image'] = formdata.get('image', [''])[0].strip()
|
||||
options['cpu_guarantee'] = formdata.get(
|
||||
'cpu_guarantee', [''])[0].strip()
|
||||
options['mem_guarantee'] = formdata.get(
|
||||
'mem_guarantee', [''])[0].strip()
|
||||
options['extra_resource_limits'] = formdata.get(
|
||||
'extra_resource_limits', [''])[0].strip()
|
||||
return options
|
||||
|
||||
@property
|
||||
def singleuser_image_spec(self):
|
||||
global cloud
|
||||
if cloud == 'ack':
|
||||
image = 'registry.aliyuncs.com/kubeflow-images-public/tensorflow-notebook-cpu'
|
||||
else:
|
||||
image = 'gcr.io/kubeflow-images-public/tensorflow-1.8.0-notebook-cpu:v0.2.0'
|
||||
if self.user_options.get('image'):
|
||||
image = self.user_options['image']
|
||||
return image
|
||||
|
||||
@property
|
||||
def cpu_guarantee(self):
|
||||
cpu = '500m'
|
||||
if self.user_options.get('cpu_guarantee'):
|
||||
cpu = self.user_options['cpu_guarantee']
|
||||
return cpu
|
||||
|
||||
@property
|
||||
def mem_guarantee(self):
|
||||
mem = '1Gi'
|
||||
if self.user_options.get('mem_guarantee'):
|
||||
mem = self.user_options['mem_guarantee']
|
||||
return mem
|
||||
|
||||
@property
|
||||
def extra_resource_limits(self):
|
||||
extra = ''
|
||||
if self.user_options.get('extra_resource_limits'):
|
||||
extra = json.loads(self.user_options['extra_resource_limits'])
|
||||
return extra
|
||||
|
||||
|
||||
###################################################
|
||||
# JupyterHub Options
|
||||
###################################################
|
||||
c.JupyterHub.ip = '0.0.0.0'
|
||||
c.JupyterHub.hub_ip = '0.0.0.0'
|
||||
# Don't try to cleanup servers on exit - since in general for k8s, we want
|
||||
# the hub to be able to restart without losing user containers
|
||||
c.JupyterHub.cleanup_servers = False
|
||||
###################################################
|
||||
|
||||
###################################################
|
||||
# Spawner Options
|
||||
###################################################
|
||||
cloud = os.environ.get('CLOUD_NAME')
|
||||
registry = os.environ.get('REGISTRY')
|
||||
repoName = os.environ.get('REPO_NAME')
|
||||
c.JupyterHub.spawner_class = KubeFormSpawner
|
||||
c.KubeSpawner.singleuser_image_spec = '{0}/{1}/tensorflow-notebook'.format(registry, repoName)
|
||||
|
||||
c.KubeSpawner.cmd = 'start-singleuser.sh'
|
||||
c.KubeSpawner.args = ['--allow-root']
|
||||
# gpu images are very large ~15GB. need a large timeout.
|
||||
c.KubeSpawner.start_timeout = 60 * 30
|
||||
# Increase timeout to 5 minutes to avoid HTTP 500 errors on JupyterHub
|
||||
c.KubeSpawner.http_timeout = 60 * 5
|
||||
|
||||
# Volume setup
|
||||
c.KubeSpawner.singleuser_uid = 1000
|
||||
c.KubeSpawner.singleuser_fs_gid = 100
|
||||
c.KubeSpawner.singleuser_working_dir = '/home/jovyan'
|
||||
volumes = []
|
||||
volume_mounts = []
|
||||
###################################################
|
||||
# Persistent volume options
|
||||
###################################################
|
||||
# Using persistent storage requires a default storage class.
|
||||
# TODO(jlewi): Verify this works on minikube.
|
||||
# see https://github.com/kubeflow/kubeflow/pull/22#issuecomment-350500944
|
||||
pvc_mount = os.environ.get('NOTEBOOK_PVC_MOUNT')
|
||||
if pvc_mount and pvc_mount != 'null':
|
||||
c.KubeSpawner.user_storage_pvc_ensure = True
|
||||
# How much disk space do we want?
|
||||
c.KubeSpawner.user_storage_capacity = '10Gi'
|
||||
c.KubeSpawner.pvc_name_template = 'claim-{username}{servername}'
|
||||
volumes.append(
|
||||
{
|
||||
'name': 'volume-{username}{servername}',
|
||||
'persistentVolumeClaim': {
|
||||
'claimName': 'claim-{username}{servername}'
|
||||
}
|
||||
}
|
||||
)
|
||||
volume_mounts.append(
|
||||
{
|
||||
'mountPath': pvc_mount,
|
||||
'name': 'volume-{username}{servername}'
|
||||
}
|
||||
)
|
||||
|
||||
# ###################################################
|
||||
# ### Extra volumes for NVIDIA drivers (Azure)
|
||||
# ###################################################
|
||||
# # Temporary fix:
|
||||
# # AKS / acs-engine doesn't yet use device plugin so we have to mount the drivers to use GPU
|
||||
# # TODO(wbuchwalter): Remove once device plugin is merged
|
||||
if cloud == 'aks' or cloud == 'acsengine':
|
||||
volumes.append({
|
||||
'name': 'nvidia',
|
||||
'hostPath': {
|
||||
'path': '/usr/local/nvidia'
|
||||
}
|
||||
})
|
||||
volume_mounts.append({
|
||||
'name': 'nvidia',
|
||||
'mountPath': '/usr/local/nvidia'
|
||||
})
|
||||
|
||||
c.KubeSpawner.volumes = volumes
|
||||
c.KubeSpawner.volume_mounts = volume_mounts
|
||||
|
|
@ -1,302 +0,0 @@
|
|||
// A ksonnet prototype/component for using NFS.
|
||||
|
||||
{
|
||||
// TODO(https://github.com/ksonnet/ksonnet/issues/222): Taking namespace as an argument is a work around for the fact that ksonnet
|
||||
// doesn't support automatically piping in the namespace from the environment to prototypes.
|
||||
//
|
||||
// Return a list of components needed if you want to mount some disks using NFS.
|
||||
// diskNames should be a list of PDs.
|
||||
all(params):: {
|
||||
local namespace = params.namespace,
|
||||
local name = params.name,
|
||||
local disks = params.disks,
|
||||
|
||||
// Create a list of the resources needed for a particular disk
|
||||
local diskToList = function(diskName) [
|
||||
$.parts(namespace, name,).diskResources(diskName).storageClass,
|
||||
$.parts(namespace, name,).diskResources(diskName).volumeClaim,
|
||||
$.parts(namespace, name,).diskResources(diskName).service,
|
||||
$.parts(namespace, name,).diskResources(diskName).provisioner,
|
||||
],
|
||||
local util = import "kubeflow/core/util.libsonnet",
|
||||
local allDisks = std.flattenArrays(std.map(diskToList, util.toArray(disks))),
|
||||
|
||||
items::
|
||||
if std.length(allDisks) > 0 then
|
||||
[
|
||||
$.parts(namespace, name).serviceAccount,
|
||||
$.parts(namespace, name).role,
|
||||
$.parts(namespace, name).roleBinding,
|
||||
$.parts(namespace, name).clusterRoleBinding,
|
||||
] + allDisks
|
||||
else
|
||||
[],
|
||||
|
||||
}.items,
|
||||
|
||||
// Create a provisioner with the specified name.
|
||||
// disks should be a list GCP persistent disk names; these disks should be in the
|
||||
// same zone as your cluster.
|
||||
// TODO(jlewi):
|
||||
parts(namespace, name):: {
|
||||
|
||||
local serviceAccountName = name,
|
||||
local serviceAccountRoleName = name,
|
||||
|
||||
|
||||
// Create the resources for a specific disk.
|
||||
// Each NFS Provisioner can only manage 1 PD so we need to create one for each disk.
|
||||
diskResources(diskName): {
|
||||
|
||||
local storageClassName = diskName + "-nfs",
|
||||
local provisionerName = diskName + "-provisioner",
|
||||
local storageClassProvisioner = diskName + "/nfs",
|
||||
local serviceName = diskName + "-service",
|
||||
|
||||
volumeClaim: {
|
||||
apiVersion: "v1",
|
||||
kind: "PersistentVolumeClaim",
|
||||
metadata: {
|
||||
annotations: {
|
||||
"volume.beta.kubernetes.io/storage-class": storageClassName,
|
||||
},
|
||||
name: diskName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
accessModes: [
|
||||
"ReadWriteMany",
|
||||
],
|
||||
resources: {
|
||||
requests: {
|
||||
storage: "1Mi",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
// TODO(jlewi): Is storageClass actually name space scoped? Seems to show up in default namespace as well.
|
||||
// TODO(jlewi): Could we just use the default cluster storage class?
|
||||
storageClass: {
|
||||
apiVersion: "storage.k8s.io/v1beta1",
|
||||
kind: "StorageClass",
|
||||
metadata: {
|
||||
name: storageClassName,
|
||||
namespace: namespace,
|
||||
},
|
||||
// This value must be the same as passed as argument --provisioner to the provisioner
|
||||
provisioner: storageClassProvisioner,
|
||||
},
|
||||
|
||||
service: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: provisionerName,
|
||||
},
|
||||
name: serviceName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "nfs",
|
||||
port: 2049,
|
||||
},
|
||||
{
|
||||
name: "mountd",
|
||||
port: 20048,
|
||||
},
|
||||
{
|
||||
name: "rpcbind",
|
||||
port: 111,
|
||||
},
|
||||
{
|
||||
name: "rpcbind-udp",
|
||||
port: 111,
|
||||
protocol: "UDP",
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: provisionerName,
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
provisioner: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: provisionerName,
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
strategy: {
|
||||
type: "Recreate",
|
||||
},
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: provisionerName,
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"-provisioner=" + storageClassProvisioner,
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "POD_IP",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "status.podIP",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SERVICE_NAME",
|
||||
value: serviceName,
|
||||
},
|
||||
{
|
||||
name: "POD_NAMESPACE",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "metadata.namespace",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
image: "quay.io/kubernetes_incubator/nfs-provisioner:v1.0.8",
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
name: "nfs-provisioner",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 2049,
|
||||
name: "nfs",
|
||||
},
|
||||
{
|
||||
containerPort: 20048,
|
||||
name: "mountd",
|
||||
},
|
||||
{
|
||||
containerPort: 111,
|
||||
name: "rpcbind",
|
||||
},
|
||||
{
|
||||
containerPort: 111,
|
||||
name: "rpcbind-udp",
|
||||
protocol: "UDP",
|
||||
},
|
||||
],
|
||||
securityContext: {
|
||||
capabilities: {
|
||||
add: [
|
||||
"DAC_READ_SEARCH",
|
||||
],
|
||||
},
|
||||
},
|
||||
volumeMounts: [{
|
||||
// Needs to be mounted under /export because /export is what is exported for NFS.
|
||||
// https://github.com/kubernetes-incubator/external-storage/tree/master/nfs#quickstart
|
||||
mountPath: "/export",
|
||||
name: diskName,
|
||||
}],
|
||||
},
|
||||
],
|
||||
volumes: [{
|
||||
name: diskName,
|
||||
gcePersistentDisk: {
|
||||
pdName: diskName,
|
||||
},
|
||||
}],
|
||||
serviceAccountName: serviceAccountName,
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // provisioner
|
||||
},
|
||||
|
||||
serviceAccount: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: name + "nfs-provisioner",
|
||||
},
|
||||
name: serviceAccountName,
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
|
||||
role: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: serviceAccountRoleName,
|
||||
namespace: namespace,
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"*",
|
||||
],
|
||||
// TODO(jlewi): This is very permissive so we may want to lock this down.
|
||||
resources: [
|
||||
"*",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
roleBinding: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: name + "-nfs-role",
|
||||
namespace: namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: serviceAccountName,
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: serviceAccountRoleName,
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
// see https://github.com/kubernetes-incubator/external-storage/tree/master/docs#authorizing-provisioners-for-rbac-or-openshift
|
||||
clusterRoleBinding: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
name: name + "-nfs-role",
|
||||
namespace: namespace,
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "system:persistent-volume-provisioner",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: serviceAccountRoleName,
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
}, // parts
|
||||
}
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.kubeflow-core
|
||||
// @description Kubeflow core components
|
||||
// @shortDescription Kubeflow core components. This currently includes JupyterHub and the TfJob controller.
|
||||
// @param name string Name to give to each of the components
|
||||
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
|
||||
// @optionalParam disks string null Comma separated list of Google persistent disks to attach to jupyter environments.
|
||||
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
|
||||
// @optionalParam AmbassadorServiceType string ClusterIP The service type for the API Gateway.
|
||||
// @optionalParam AmbassadorImage string quay.io/datawire/ambassador:0.30.1 The image for the API Gateway.
|
||||
// @optionalParam StatsdImage string quay.io/datawire/statsd:0.30.1 The image for the Stats and Monitoring.
|
||||
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v0.2.0 The image for the TfJob controller.
|
||||
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
|
||||
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
|
||||
// @optionalParam jupyterHubServiceType string ClusterIP The service type for Jupyterhub.
|
||||
// @optionalParam jupyterHubImage string gcr.io/kubeflow/jupyterhub-k8s:v20180531-3bb991b1 The image to use for JupyterHub.
|
||||
// @optionalParam jupyterHubAuthenticator string null The authenticator to use
|
||||
// @optionalParam jupyterNotebookPVCMount string null Mount path for PVC. Set empty to disable PVC
|
||||
// @optionalParam jupyterNotebookRegistry string gcr.io The docker image registry for JupyterNotebook.
|
||||
// @optionalParam jupyterNotebookRepoName string kubeflow-images-public The repoistory name for JupyterNotebook.
|
||||
// @optionalParam reportUsage string false Whether or not to report Kubeflow usage to kubeflow.org.
|
||||
// @optionalParam usageId string unknown_cluster Optional id to use when reporting usage to kubeflow.org
|
||||
// @optionalParam tfJobVersion string v1alpha2 which version of the TFJob operator to use
|
||||
// @optionalParam centralUiImage string gcr.io/kubeflow-images-public/centraldashboard:v20180618-v0.2.0-rc.0-5-g715aafc8-e3b0c4 Image to use for Central UI.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local all = import "kubeflow/core/all.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.cert-manager
|
||||
// @description Provides cert-manager prototypes for generating SSL certificates.
|
||||
// @shortDescription Certificate generation on GKE.
|
||||
// @param name string Name for the component
|
||||
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
|
||||
// @param acmeEmail string The Lets Encrypt account email address
|
||||
// @optionalParam acmeUrl string https://acme-v01.api.letsencrypt.org/directory The ACME server URL, set to https://acme-staging.api.letsencrypt.org/directory for staging API.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local certManager = import "kubeflow/core/cert-manager.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
certManager.parts(updatedParams.namespace).certManagerParts(params.acmeEmail, params.acmeUrl)
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.cloud-endpoints
|
||||
// @description Provides cloud-endpoints prototypes for creating Cloud Endpoints services and DNS records.
|
||||
// @shortDescription Cloud Endpoint domain creation.
|
||||
// @param name string Name for the component
|
||||
// @optionalParam secretName string admin-gcp-sa Name of secret containing the json service account key.
|
||||
// @optionalParam secretKey string admin-gcp-sa.json Name of the key in the secret containing the JSON service account key.
|
||||
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local cloudEndpoints = import "kubeflow/core/cloud-endpoints.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
cloudEndpoints.parts(updatedParams.namespace).cloudEndpointsParts(params.secretName, params.secretKey)
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.iap-ingress
|
||||
// @description Provides ingress prototypes for setting up IAP on GKE.
|
||||
// @shortDescription Ingress for IAP on GKE.
|
||||
// @param name string Name for the component
|
||||
// @param ipName string The name of the global ip address to use.
|
||||
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
|
||||
// @optionalParam secretName string envoy-ingress-tls The name of the secret containing the SSL certificates.
|
||||
// @optionalParam hostname string null The hostname associated with this ingress. Eg: mykubeflow.example.com
|
||||
// @optionalParam issuer string letsencrypt-prod The cert-manager issuer name.
|
||||
// @optionalParam envoyImage string gcr.io/kubeflow-images-public/envoy:v20180309-0fb4886b463698702b6a08955045731903a18738 The image for envoy.
|
||||
// @optionalParam disableJwtChecking string false Disable JWT checking.
|
||||
// @optionalParam oauthSecretName string kubeflow-oauth The name of the secret containing the OAuth CLIENT_ID and CLIENT_SECRET.
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local iap = import "kubeflow/core/iap.libsonnet";
|
||||
local util = import "kubeflow/core/util.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
local namespace = updatedParams.namespace;
|
||||
local disableJwtChecking = util.toBool(params.disableJwtChecking);
|
||||
|
||||
iap.parts(namespace).ingressParts(params.secretName, params.ipName, params.hostname, params.issuer, params.envoyImage, disableJwtChecking, params.oauthSecretName)
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
// @apiVersion 1
|
||||
// @name io.ksonnet.pkg.tensorboard
|
||||
// @description Tensorboard components
|
||||
// @shortDescription ksonnet components for Tensorboard
|
||||
// @param name string Name to give to each of the components
|
||||
|
||||
local k = import "k.libsonnet";
|
||||
local tensorboard = import "kubeflow/core/tensorboard.libsonnet";
|
||||
|
||||
local name = import "param://name";
|
||||
|
||||
// updatedParams includes the namespace from env by default.
|
||||
// We can override namespace in params if needed
|
||||
local updatedParams = env + params;
|
||||
|
||||
local logDir = updatedParams.logDir;
|
||||
|
||||
local tb = tensorboard {
|
||||
params+: updatedParams {
|
||||
name: name,
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
//std.assertEqual(true, std.length(logDir) > 0)
|
||||
std.prune(k.core.v1.list.new(tb.components))
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
// @apiVersion 0.1
|
||||
// @name io.ksonnet.pkg.tf-job-operator
|
||||
// @description A TensorFlow job operator CRD
|
||||
// @shortDescription A TensorFlow job operator.
|
||||
// @param name string Name to give to each of the components
|
||||
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
|
||||
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
|
||||
// @optionalParam tfAmbassadorServiceType string ClusterIP The service type for the API Gateway.
|
||||
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v0.2.0 The image for the TfJob controller.
|
||||
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
|
||||
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
|
||||
// @optionalParam tfJobVersion string v1alpha2 which version of the TFJob operator to use
|
||||
|
||||
// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--".
|
||||
|
||||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["tf-job-operator"];
|
||||
local k = import "k.libsonnet";
|
||||
local tfjob = import "kubeflow/core/tf-job-operator.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(tfjob.all(updatedParams)))
|
||||
|
|
@ -1,125 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# A simple shell script to enable IAP and configure timeouts by using gcloud.
|
||||
[ -z ${CLIENT_ID} ] && echo Error CLIENT_ID must be set && exit 1
|
||||
[ -z ${CLIENT_SECRET} ] && echo Error CLIENT_SECRET must be set && exit 1
|
||||
[ -z ${NAMESPACE} ] && echo Error NAMESPACE must be set && exit 1
|
||||
[ -z ${SERVICE} ] && echo Error SERVICE must be set && exit 1
|
||||
|
||||
apk add --update jq
|
||||
curl https://storage.googleapis.com/kubernetes-release/release/v1.9.4/bin/linux/amd64/kubectl > /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl
|
||||
|
||||
# Stagger init of replicas when acquiring lock
|
||||
sleep $(( $RANDOM % 5 + 1 ))
|
||||
|
||||
# We acquire a lock because we want to ensure there is a single process
|
||||
# trying to modify the backend at a time.
|
||||
kubectl get svc ${SERVICE} -o json > service.json
|
||||
LOCK=$(jq -r ".metadata.annotations.iaplock" service.json)
|
||||
|
||||
NOW=$(date -u +'%s')
|
||||
if [[ -z "${LOCK}" || "${LOCK}" == "null" ]]; then
|
||||
LOCK_T=$NOW
|
||||
else
|
||||
LOCK_T=$(echo "${LOCK}" | cut -d' ' -f2)
|
||||
fi
|
||||
LOCK_AGE=$(( $NOW - $LOCK_T ))
|
||||
LOCK_TTL=120
|
||||
if [[ -z "${LOCK}" || "${LOCK}" == "null" || "${LOCK_AGE}" -gt "${LOCK_TTL}" ]]; then
|
||||
jq -r ".metadata.annotations.iaplock=\"$(hostname -s) ${NOW}\"" service.json > service_lock.json
|
||||
kubectl apply -f service_lock.json 2>/dev/null
|
||||
if [[ $? -eq 0 ]]; then
|
||||
echo "Acquired lock on service annotation to update IAP."
|
||||
else
|
||||
echo "WARN: Failed to acquire lock on service annotation."
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "WARN: Lock on service annotation already acquired by: $LOCK, age: $LOCK_AGE, TTL: $LOCK_TTL"
|
||||
sleep 20
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PROJECT=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/project-id)
|
||||
if [ -z ${PROJECT} ]; then
|
||||
echo Error unable to fetch PROJECT from compute metadata
|
||||
exit 1
|
||||
fi
|
||||
|
||||
PROJECT_NUM=$(curl -s -H "Metadata-Flavor: Google" http://metadata.google.internal/computeMetadata/v1/project/numeric-project-id)
|
||||
if [ -z ${PROJECT_NUM} ]; then
|
||||
echo Error unable to fetch PROJECT_NUM from compute metadata
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Activate the service account
|
||||
gcloud auth activate-service-account --key-file=${GOOGLE_APPLICATION_CREDENTIALS}
|
||||
# Print out the config for debugging
|
||||
gcloud config list
|
||||
|
||||
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
while [[ -z ${BACKEND_ID} ]];
|
||||
do BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${NODE_PORT}- --format='value(id)');
|
||||
echo "Waiting for backend id PROJECT=${PROJECT} NAMESPACE=${NAMESPACE} SERVICE=${SERVICE} filter=name~k8s-be-${NODE_PORT}- ...";
|
||||
sleep 2;
|
||||
done
|
||||
echo BACKEND_ID=${BACKEND_ID}
|
||||
|
||||
NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
BACKEND_SERVICE=$(gcloud --project=${PROJECT} compute backend-services list --filter=name~k8s-be-${NODE_PORT}- --uri)
|
||||
# Enable IAP on the backend service:
|
||||
gcloud --project=${PROJECT} compute backend-services update ${BACKEND_SERVICE} \
|
||||
--global \
|
||||
--iap=enabled,oauth2-client-id=${CLIENT_ID},oauth2-client-secret=${CLIENT_SECRET}
|
||||
|
||||
while [[ -z ${HEALTH_CHECK_URI} ]];
|
||||
do HEALTH_CHECK_URI=$(gcloud compute --project=${PROJECT} health-checks list --filter=name~k8s-be-${NODE_PORT}- --uri);
|
||||
echo "Waiting for the healthcheck resource PROJECT=${PROJECT} NODEPORT=${NODE_PORT} SERVICE=${SERVICE}...";
|
||||
sleep 2;
|
||||
done
|
||||
|
||||
# Since we create the envoy-ingress ingress object before creating the envoy
|
||||
# deployment object, healthcheck will not be configured correctly in the GCP
|
||||
# load balancer. It will default the healthcheck request path to a value of
|
||||
# / instead of the intended /healthz.
|
||||
# Manually update the healthcheck request path to /healthz
|
||||
gcloud --project=${PROJECT} compute health-checks update http ${HEALTH_CHECK_URI} --request-path=/healthz
|
||||
|
||||
# Since JupyterHub uses websockets we want to increase the backend timeout
|
||||
echo Increasing backend timeout for JupyterHub
|
||||
gcloud --project=${PROJECT} compute backend-services update --global ${BACKEND_SERVICE} --timeout=3600
|
||||
|
||||
JWT_AUDIENCE="/projects/${PROJECT_NUM}/global/backendServices/${BACKEND_ID}"
|
||||
|
||||
# For healthcheck compare.
|
||||
mkdir -p /var/shared
|
||||
echo "JWT_AUDIENCE=${JWT_AUDIENCE}" > /var/shared/healthz.env
|
||||
echo "NODE_PORT=${NODE_PORT}" >> /var/shared/healthz.env
|
||||
echo "BACKEND_ID=${BACKEND_ID}" >> /var/shared/healthz.env
|
||||
|
||||
# TODO(https://github.com/kubeflow/kubeflow/issues/942): We should publish the modified envoy
|
||||
# config as a config map and use that in the envoy sidecars.
|
||||
kubectl get configmap -n ${NAMESPACE} envoy-config -o jsonpath='{.data.envoy-config\.json}' | \
|
||||
sed -e "s|{{JWT_AUDIENCE}}|${JWT_AUDIENCE}|g" > /var/shared/envoy-config.json
|
||||
|
||||
echo "Clearing lock on service annotation"
|
||||
kubectl patch svc "${SERVICE}" -p "{\"metadata\": { \"annotations\": {\"iaplock\": \"\" }}}"
|
||||
|
||||
function checkIAP() {
|
||||
# created by init container.
|
||||
. /var/shared/healthz.env
|
||||
|
||||
# If node port or backend id change, so does the JWT audience.
|
||||
CURR_NODE_PORT=$(kubectl --namespace=${NAMESPACE} get svc ${SERVICE} -o jsonpath='{.spec.ports[0].nodePort}')
|
||||
CURR_BACKEND_ID=$(gcloud compute --project=${PROJECT} backend-services list --filter=name~k8s-be-${CURR_NODE_PORT}- --format='value(id)')
|
||||
[ "$BACKEND_ID" == "$CURR_BACKEND_ID" ]
|
||||
}
|
||||
|
||||
# Verify IAP every 10 seconds.
|
||||
while true; do
|
||||
if ! checkIAP; then
|
||||
echo "$(date) WARN: IAP check failed, restarting container."
|
||||
exit 1
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
{
|
||||
local util = import "kubeflow/core/util.libsonnet",
|
||||
|
||||
all(params):: {
|
||||
local reportUsageBool = util.toBool(params.reportUsage),
|
||||
result:: if reportUsageBool then
|
||||
[
|
||||
$.parts(params.namespace).role,
|
||||
$.parts(params.namespace).roleBinding,
|
||||
$.parts(params.namespace).serviceAccount,
|
||||
$.parts(params.namespace).deployment(params.usageId),
|
||||
]
|
||||
else [],
|
||||
}.result,
|
||||
|
||||
parts(namespace):: {
|
||||
|
||||
// Spartakus needs to be able to get information about the cluster in order to create a report.
|
||||
role: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"nodes",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
],
|
||||
},
|
||||
],
|
||||
}, // role
|
||||
|
||||
roleBinding:: {
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "spartakus",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "spartakus",
|
||||
namespace: namespace,
|
||||
},
|
||||
],
|
||||
}, // operator-role binding
|
||||
|
||||
|
||||
serviceAccount: {
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
namespace: namespace,
|
||||
},
|
||||
},
|
||||
|
||||
deployment(usageId):: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "spartakus-volunteer",
|
||||
namespace: namespace,
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus-volunteer",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: "gcr.io/google_containers/spartakus-amd64:v1.0.0",
|
||||
name: "volunteer",
|
||||
args: [
|
||||
"volunteer",
|
||||
"--cluster-id=" + usageId,
|
||||
"--database=https://stats-collector.kubeflow.org",
|
||||
],
|
||||
},
|
||||
],
|
||||
serviceAccountName: "spartakus",
|
||||
}, // spec
|
||||
},
|
||||
},
|
||||
}, // deployment
|
||||
},
|
||||
}
|
||||
|
|
@ -1,247 +0,0 @@
|
|||
{
|
||||
// Parameters are intended to be late bound.
|
||||
params:: {
|
||||
name: null,
|
||||
labels: {
|
||||
app: $.params.name,
|
||||
},
|
||||
|
||||
serviceType: "ClusterIP",
|
||||
|
||||
logDir: "",
|
||||
|
||||
defaultTbImage: "gcr.io/tensorflow/tensorflow:latest",
|
||||
|
||||
|
||||
// Whether or not to enable s3 parameters
|
||||
s3Enable:: false,
|
||||
|
||||
// Which cloud to use
|
||||
cloud:: null,
|
||||
},
|
||||
|
||||
// Parametes specific to GCP.
|
||||
gcpParams:: {
|
||||
gcpCredentialSecretName: "",
|
||||
} + $.params,
|
||||
|
||||
// Parameters that control S3 access
|
||||
// params overrides s3params because params can be overwritten by the user to override the defaults.
|
||||
s3params:: {
|
||||
// Name of the k8s secrets containing S3 credentials
|
||||
s3SecretName: "",
|
||||
// Name of the key in the k8s secret containing AWS_ACCESS_KEY_ID.
|
||||
s3SecretAccesskeyidKeyName: "",
|
||||
|
||||
// Name of the key in the k8s secret containing AWS_SECRET_ACCESS_KEY.
|
||||
s3SecretSecretaccesskeyKeyName: "",
|
||||
|
||||
// S3 region
|
||||
s3AwsRegion: "us-west-1",
|
||||
|
||||
// TODO(jlewi): We should use util.toBool to automatically conver to actual boolean values.
|
||||
// The use of strings is left over from when they were prototype parameters which only supports string type.
|
||||
|
||||
// true Whether or not to use https for S3 connections
|
||||
s3UseHttps: "true",
|
||||
|
||||
// Whether or not to verify https certificates for S3 connections
|
||||
s3VerifySsl: "true",
|
||||
|
||||
// URL for your s3-compatible endpoint.
|
||||
s3Endpoint: "http://s3.us-west-1.amazonaws.com,",
|
||||
} + $.params,
|
||||
|
||||
|
||||
components:: {
|
||||
|
||||
all::
|
||||
// TODO(jlewi): It would be better to structure s3 as a mixin.
|
||||
// As an example it would be great to allow S3 and GCS parameters
|
||||
// to be enabled simultaneously. This should be doable because
|
||||
// each entails adding a set of environment variables and volumes
|
||||
// to the containers. These volumes/environment variables shouldn't
|
||||
// overlap so there's no reason we shouldn't be able to just add
|
||||
// both modifications to the base container.
|
||||
// I think we want to restructure things as mixins so they can just
|
||||
// be added.
|
||||
if $.params.s3Enable then
|
||||
[
|
||||
$.s3parts.tb,
|
||||
$.s3parts.tfDeployment,
|
||||
]
|
||||
else if $.params.cloud == "gcp" then
|
||||
[
|
||||
$.gcpParts.tb,
|
||||
$.gcpParts.tfDeployment,
|
||||
]
|
||||
else
|
||||
[
|
||||
$.parts.tb,
|
||||
$.parts.tfDeployment,
|
||||
],
|
||||
}.all,
|
||||
|
||||
parts:: {
|
||||
// We define the containers one level beneath parts because combined with jsonnet late binding
|
||||
// this makes it easy for users to override specific bits of the container.
|
||||
tbContainer:: {
|
||||
name: $.params.name,
|
||||
image: $.params.defaultTbImage,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
args: [
|
||||
$.params.logDir,
|
||||
"--port=9000",
|
||||
],
|
||||
command: [
|
||||
"/usr/local/bin/tensorboard",
|
||||
],
|
||||
ports: [
|
||||
{
|
||||
containerPort: 9000,
|
||||
},
|
||||
],
|
||||
|
||||
resources: {
|
||||
requests: {
|
||||
memory: "1Gi",
|
||||
cpu: "1",
|
||||
},
|
||||
limits: {
|
||||
memory: "4Gi",
|
||||
cpu: "4",
|
||||
},
|
||||
},
|
||||
}, // tbContainer
|
||||
|
||||
tfDeployment: {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: $.params.name,
|
||||
namespace: $.params.namespace,
|
||||
labels: $.params.labels,
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: $.params.labels,
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
$.parts.tbContainer,
|
||||
],
|
||||
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // tfDeployment
|
||||
|
||||
tb: {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: $.params.labels,
|
||||
name: $.params.name,
|
||||
namespace: $.params.namespace,
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tb-mapping-" + $.params.name + "-get",
|
||||
"prefix: /tensorboard/ " + $.params.name + "/",
|
||||
"rewrite: /",
|
||||
"method: GET",
|
||||
"service: " + $.params.name + "." + $.params.namespace + ":9000",
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "tb",
|
||||
port: 9000,
|
||||
targetPort: 9000,
|
||||
},
|
||||
],
|
||||
selector: $.params.labels,
|
||||
type: $.params.serviceType,
|
||||
},
|
||||
}, // tb
|
||||
|
||||
}, // parts
|
||||
|
||||
// Parts specific to S3
|
||||
s3parts:: $.parts {
|
||||
s3Env:: [
|
||||
{ name: "AWS_ACCESS_KEY_ID", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretAccesskeyidKeyName } } },
|
||||
{ name: "AWS_SECRET_ACCESS_KEY", valueFrom: { secretKeyRef: { name: $.s3params.s3SecretName, key: $.s3params.s3SecretSecretaccesskeyKeyName } } },
|
||||
{ name: "AWS_REGION", value: $.s3params.s3AwsRegion },
|
||||
{ name: "S3_REGION", value: $.s3params.s3AwsRegion },
|
||||
{ name: "S3_USE_HTTPS", value: $.s3params.s3UseHttps },
|
||||
{ name: "S3_VERIFY_SSL", value: $.s3params.s3VerifySsl },
|
||||
{ name: "S3_ENDPOINT", value: $.s3params.s3Endpoint },
|
||||
],
|
||||
|
||||
tbContainer: $.parts.tbContainer {
|
||||
env+: $.s3parts.s3Env,
|
||||
},
|
||||
|
||||
tfDeployment: $.parts.tfDeployment {
|
||||
spec: +{
|
||||
template: +{
|
||||
|
||||
spec: +{
|
||||
containers: [
|
||||
$.s3parts.tbContainer,
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // tfDeployment
|
||||
}, // s3parts
|
||||
|
||||
// Parts specific to GCP
|
||||
gcpParts:: $.parts {
|
||||
gcpEnv:: [
|
||||
if $.gcpParams.gcpCredentialSecretName != "" then
|
||||
{ name: "GOOGLE_APPLICATION_CREDENTIALS", value: "/secret/gcp-credentials/key.json" },
|
||||
],
|
||||
|
||||
tbContainer: $.parts.tbContainer {
|
||||
env+: $.gcpParts.gcpEnv,
|
||||
volumeMounts+: [
|
||||
if $.gcpParams.gcpCredentialSecretName != "" then
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
mountPath: "/secret/gcp-credentials",
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
tfDeployment: $.parts.tfDeployment {
|
||||
spec+: {
|
||||
template+: {
|
||||
|
||||
spec+: {
|
||||
containers: [
|
||||
$.gcpParts.tbContainer,
|
||||
],
|
||||
|
||||
volumes: [
|
||||
if $.gcpParams.gcpCredentialSecretName != "" then
|
||||
{
|
||||
name: "gcp-credentials",
|
||||
secret: {
|
||||
secretName: $.gcpParams.gcpCredentialSecretName,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}, // tfDeployment
|
||||
}, // gcpParts
|
||||
}
|
||||
|
|
@ -1,259 +0,0 @@
|
|||
local ambassador = import "../ambassador.libsonnet";
|
||||
local params = {
|
||||
namespace:: "test-kf-001",
|
||||
tfAmbassadorServiceType:: "ClusterIP",
|
||||
tfAmbassadorImage:: "quay.io/datawire/ambassador:0.34.0",
|
||||
tfStatsdImage:: "quay.io/datawire/statsd:0.34.0",
|
||||
};
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).service(params.tfAmbassadorServiceType),
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador",
|
||||
},
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "ambassador",
|
||||
port: 80,
|
||||
targetPort: 80,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "ambassador",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).adminService,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador-admin",
|
||||
},
|
||||
name: "ambassador-admin",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "ambassador-admin",
|
||||
port: 8877,
|
||||
targetPort: 8877,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "ambassador",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).role,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"services",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"configmaps",
|
||||
],
|
||||
verbs: [
|
||||
"create",
|
||||
"update",
|
||||
"patch",
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"secrets",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).serviceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).roleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: "ambassador",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).deploy(params.tfStatsdImage),
|
||||
{
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "ambassador",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
replicas: 3,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "ambassador",
|
||||
},
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
env: [
|
||||
{
|
||||
name: "AMBASSADOR_NAMESPACE",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "metadata.namespace",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AMBASSADOR_SINGLE_NAMESPACE",
|
||||
value: "true",
|
||||
},
|
||||
],
|
||||
image: "quay.io/datawire/ambassador:0.34.0",
|
||||
livenessProbe: {
|
||||
httpGet: {
|
||||
path: "/ambassador/v0/check_alive",
|
||||
port: 8877,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
name: "ambassador",
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: "/ambassador/v0/check_ready",
|
||||
port: 8877,
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 30,
|
||||
},
|
||||
resources: {
|
||||
limits: {
|
||||
cpu: 1,
|
||||
memory: "400Mi",
|
||||
},
|
||||
requests: {
|
||||
cpu: "200m",
|
||||
memory: "100Mi",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
image: "quay.io/datawire/statsd:0.34.0",
|
||||
name: "statsd",
|
||||
},
|
||||
],
|
||||
restartPolicy: "Always",
|
||||
serviceAccountName: "ambassador",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
ambassador.parts(params.namespace, params.tfAmbassadorImage).k8sDashboard("cloud"),
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
annotations: {
|
||||
"getambassador.io/config": "---\napiVersion: ambassador/v0\nkind: Mapping\nname: k8s-dashboard-ui-mapping\nprefix: /k8s/ui/\nrewrite: /\ntls: true\nservice: kubernetes-dashboard.kube-system",
|
||||
},
|
||||
name: "k8s-dashboard",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 443,
|
||||
targetPort: 8443,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
"k8s-app": "kubernetes-dashboard",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
local centraldashboard = import "../centraldashboard.libsonnet";
|
||||
local params = {
|
||||
namespace:: "kubeflow",
|
||||
cloud:: "gke",
|
||||
};
|
||||
|
||||
std.assertEqual(
|
||||
centraldashboard.parts(params.namespace).deployUi("dashboard/image:latest"),
|
||||
{
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
},
|
||||
spec: {
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
image: "dashboard/image:latest",
|
||||
name: "centraldashboard",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8082,
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
serviceAccountName: "centraldashboard",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
centraldashboard.parts(params.namespace).uiService,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: centralui-mapping",
|
||||
"prefix: /",
|
||||
"rewrite: /",
|
||||
"service: centraldashboard." + "kubeflow",
|
||||
]),
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 8082,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
sessionAffinity: "None",
|
||||
type: "ClusterIP",
|
||||
},
|
||||
},
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
centraldashboard.parts(params.namespace).uiServiceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
},
|
||||
},
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
centraldashboard.parts(params.namespace).uiRole,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: [
|
||||
"pods",
|
||||
"pods/exec",
|
||||
"pods/log",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
"watch",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [""],
|
||||
resources: [
|
||||
"secrets",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
centraldashboard.parts(params.namespace).uiRoleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "centraldashboard",
|
||||
},
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "centraldashboard",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "centraldashboard",
|
||||
namespace: "kubeflow",
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
|
@ -1,203 +0,0 @@
|
|||
local iap = import "../iap.libsonnet";
|
||||
|
||||
std.assertEqual(iap.parts("namespace").service, {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
service: "envoy",
|
||||
},
|
||||
name: "envoy",
|
||||
namespace: "namespace",
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "envoy",
|
||||
port: 8080,
|
||||
targetPort: 8080,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
service: "envoy",
|
||||
},
|
||||
type: "NodePort",
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(iap.parts("namespace").ingress("secretName", "ipName", "hostname"), {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Ingress",
|
||||
metadata: {
|
||||
name: "envoy-ingress",
|
||||
namespace: "namespace",
|
||||
annotations: {
|
||||
"kubernetes.io/tls-acme": "true",
|
||||
"ingress.kubernetes.io/ssl-redirect": "true",
|
||||
"kubernetes.io/ingress.global-static-ip-name": "ipName",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
rules: [
|
||||
{
|
||||
host: "hostname",
|
||||
http: {
|
||||
paths: [
|
||||
{
|
||||
backend: {
|
||||
serviceName: "envoy",
|
||||
servicePort: 8080,
|
||||
},
|
||||
path: "/*",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
tls: [
|
||||
{
|
||||
secretName: "secretName",
|
||||
},
|
||||
],
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(iap.parts("namespace").ingress("secretName", "ipName", "null"), {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Ingress",
|
||||
metadata: {
|
||||
name: "envoy-ingress",
|
||||
namespace: "namespace",
|
||||
annotations: {
|
||||
"kubernetes.io/tls-acme": "true",
|
||||
"ingress.kubernetes.io/ssl-redirect": "true",
|
||||
"kubernetes.io/ingress.global-static-ip-name": "ipName",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
rules: [
|
||||
{
|
||||
http: {
|
||||
paths: [
|
||||
{
|
||||
backend: {
|
||||
serviceName: "envoy",
|
||||
servicePort: 8080,
|
||||
},
|
||||
path: "/*",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
tls: [
|
||||
{
|
||||
secretName: "secretName",
|
||||
},
|
||||
],
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(iap.parts("namespace").certificate("secretName", "hostname", "issuer"), {
|
||||
apiVersion: "certmanager.k8s.io/v1alpha1",
|
||||
kind: "Certificate",
|
||||
metadata: {
|
||||
name: "secretName",
|
||||
namespace: "namespace",
|
||||
},
|
||||
spec: {
|
||||
secretName: "secretName",
|
||||
issuerRef: {
|
||||
name: "issuer",
|
||||
},
|
||||
commonName: "hostname",
|
||||
dnsNames: [
|
||||
"hostname",
|
||||
],
|
||||
acme: {
|
||||
config: [
|
||||
{
|
||||
http01: {
|
||||
ingress: "envoy-ingress",
|
||||
},
|
||||
domains: [
|
||||
"hostname",
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(iap.parts("namespace").whoamiApp, {
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "whoami-app",
|
||||
namespace: "namespace",
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "whoami",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
env: [
|
||||
{
|
||||
name: "PORT",
|
||||
value: "8081",
|
||||
},
|
||||
],
|
||||
image: "gcr.io/cloud-solutions-group/esp-sample-app:1.0.0",
|
||||
name: "app",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8081,
|
||||
},
|
||||
],
|
||||
readinessProbe: {
|
||||
failureThreshold: 2,
|
||||
httpGet: {
|
||||
path: "/healthz",
|
||||
port: 8081,
|
||||
scheme: "HTTP",
|
||||
},
|
||||
periodSeconds: 10,
|
||||
successThreshold: 1,
|
||||
timeoutSeconds: 5,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(iap.parts("namespace").whoamiService, {
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "whoami",
|
||||
},
|
||||
name: "whoami-app",
|
||||
namespace: "namespace",
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
port: 80,
|
||||
targetPort: 8081,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "whoami",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
})
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
local jupyterhub = import "../jupyterhub.libsonnet";
|
||||
local params = {
|
||||
namespace:: "test-kf-001",
|
||||
disks:: "disk01,disk02",
|
||||
jupyterHubAuthenticator:: null,
|
||||
jupyterHubServiceType:: "ClusterIP",
|
||||
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
|
||||
jupyterNotebookPVCMount: "/home/jovyan",
|
||||
jupyterNotebookRegistry: "gcr.io",
|
||||
jupyterNotebookRepoName: "kubeflow-images-public",
|
||||
cloud: null,
|
||||
};
|
||||
|
||||
local baseSpawner = importstr "../kubeform_spawner.py";
|
||||
|
||||
// TODO(jlewi): We should be able to use std.startsWidth in later versions of jsonnet.
|
||||
//
|
||||
local config = jupyterhub.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks).data["jupyterhub_config.py"];
|
||||
local configPrefix = std.substr(config, 0, std.length(baseSpawner));
|
||||
local configSuffix = std.substr(config, std.length(baseSpawner), std.length(config) - std.length(baseSpawner));
|
||||
local configSuffixLines = std.split(configSuffix, "\n");
|
||||
|
||||
// This assertion varies the config map is the same after zeroing the actual data.
|
||||
// The data will be compared in subsequent steps.
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubConfigMap(params.jupyterHubAuthenticator, params.disks) + {
|
||||
data: {
|
||||
"jupyterhub_config.py": "",
|
||||
},
|
||||
}
|
||||
, {
|
||||
apiVersion: "v1",
|
||||
data: {
|
||||
"jupyterhub_config.py": "",
|
||||
},
|
||||
kind: "ConfigMap",
|
||||
metadata: {
|
||||
name: "jupyterhub-config",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}) &&
|
||||
|
||||
// This step verifies that the start of the spawner config is the raw file.
|
||||
std.assertEqual(configPrefix, baseSpawner)
|
||||
|
||||
&&
|
||||
|
||||
// These step verifies the suffix.
|
||||
// Verifying each line makes it much easier to debug test failures because if you just compare to a big blob
|
||||
// of text its much harder to know where they differ.
|
||||
std.assertEqual(configSuffixLines[1], "######## Authenticator ######")
|
||||
&&
|
||||
std.assertEqual(configSuffixLines[2], "c.JupyterHub.authenticator_class = 'dummyauthenticator.DummyAuthenticator'")
|
||||
&&
|
||||
std.assertEqual(configSuffixLines[3], "###### Volumes #######")
|
||||
&&
|
||||
std.assertEqual(configSuffixLines[4], 'c.KubeSpawner.volumes.extend([{"name": "disk01", "persistentVolumeClaim": {"claimName": "disk01"}}, {"name": "disk02", "persistentVolumeClaim": {"claimName": "disk02"}}])')
|
||||
&&
|
||||
std.assertEqual(configSuffixLines[5], 'c.KubeSpawner.volume_mounts.extend([{"mountPath": "/mnt/disk01", "name": "disk01"}, {"mountPath": "/mnt/disk02", "name": "disk02"}])')
|
||||
&&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubService,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
name: "tf-hub-0",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
clusterIP: "None",
|
||||
ports: [
|
||||
{
|
||||
name: "hub",
|
||||
port: 8000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubLoadBalancer(params.jupyterHubServiceType),
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "Service",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub-lb",
|
||||
},
|
||||
name: "tf-hub-lb",
|
||||
namespace: "test-kf-001",
|
||||
annotations: {
|
||||
"getambassador.io/config":
|
||||
std.join("\n", [
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tf-hub-lb-hub-mapping",
|
||||
"prefix: /hub/",
|
||||
"rewrite: /hub/",
|
||||
"timeout_ms: 300000",
|
||||
"service: tf-hub-lb." + params.namespace,
|
||||
"---",
|
||||
"apiVersion: ambassador/v0",
|
||||
"kind: Mapping",
|
||||
"name: tf-hub-lb-user-mapping",
|
||||
"prefix: /user/",
|
||||
"rewrite: /user/",
|
||||
"timeout_ms: 300000",
|
||||
"service: tf-hub-lb." + params.namespace,
|
||||
]),
|
||||
}, //annotations
|
||||
},
|
||||
spec: {
|
||||
ports: [
|
||||
{
|
||||
name: "hub",
|
||||
port: 80,
|
||||
targetPort: 8000,
|
||||
},
|
||||
],
|
||||
selector: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
type: "ClusterIP",
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHub(params.jupyterHubImage, params.jupyterNotebookPVCMount, params.cloud, params.jupyterNotebookRegistry, params.jupyterNotebookRepoName),
|
||||
{
|
||||
apiVersion: "apps/v1beta1",
|
||||
kind: "StatefulSet",
|
||||
metadata: {
|
||||
name: "tf-hub",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
serviceName: "",
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-hub",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"jupyterhub",
|
||||
"-f",
|
||||
"/etc/config/jupyterhub_config.py",
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "NOTEBOOK_PVC_MOUNT",
|
||||
value: params.jupyterNotebookPVCMount,
|
||||
},
|
||||
{
|
||||
name: "CLOUD_NAME",
|
||||
value: null,
|
||||
},
|
||||
{
|
||||
name: "REGISTRY",
|
||||
value: params.jupyterNotebookRegistry,
|
||||
},
|
||||
{
|
||||
name: "REPO_NAME",
|
||||
value: params.jupyterNotebookRepoName,
|
||||
},
|
||||
],
|
||||
image: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
|
||||
name: "tf-hub",
|
||||
ports: [
|
||||
{
|
||||
containerPort: 8000,
|
||||
},
|
||||
{
|
||||
containerPort: 8081,
|
||||
},
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/etc/config",
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
serviceAccountName: "jupyter-hub",
|
||||
volumes: [
|
||||
{
|
||||
configMap: {
|
||||
name: "jupyterhub-config",
|
||||
},
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
updateStrategy: {
|
||||
type: "RollingUpdate",
|
||||
},
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubRole,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: "jupyter-role",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"pods",
|
||||
"persistentvolumeclaims",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"watch",
|
||||
"list",
|
||||
"create",
|
||||
"delete",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"events",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"watch",
|
||||
"list",
|
||||
],
|
||||
},
|
||||
],
|
||||
}) &&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubServiceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "jupyter-hub",
|
||||
},
|
||||
name: "jupyter-hub",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}) &&
|
||||
|
||||
std.assertEqual(jupyterhub.parts(params.namespace).jupyterHubRoleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: "jupyter-role",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: "jupyter-role",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "jupyter-hub",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
})
|
||||
|
|
@ -1,93 +0,0 @@
|
|||
local nfs = import "../nfs.libsonnet";
|
||||
local params = {
|
||||
namespace:: "test-kf-001",
|
||||
name:: "nfs",
|
||||
};
|
||||
|
||||
std.assertEqual(
|
||||
nfs.parts(params.namespace, params.name).serviceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "nfsnfs-provisioner",
|
||||
},
|
||||
name: "nfs",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
nfs.parts(params.namespace, params.name).role,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "Role",
|
||||
metadata: {
|
||||
name: "nfs",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"*",
|
||||
],
|
||||
resources: [
|
||||
"*",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
nfs.parts(params.namespace, params.name).roleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "RoleBinding",
|
||||
metadata: {
|
||||
name: "nfs-nfs-role",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "Role",
|
||||
name: "nfs",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "nfs",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
nfs.parts(params.namespace, params.name).clusterRoleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
name: "nfs-nfs-role",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "system:persistent-volume-provisioner",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "nfs",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
}
|
||||
)
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
local spartakus = import "../spartakus.libsonnet";
|
||||
local params = {
|
||||
namespace:: "test-kf-001",
|
||||
usageId:: "unknown_cluster",
|
||||
};
|
||||
|
||||
std.assertEqual(
|
||||
spartakus.parts(params.namespace).role,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"nodes",
|
||||
],
|
||||
verbs: [
|
||||
"get",
|
||||
"list",
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
spartakus.parts(params.namespace).roleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "spartakus",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "spartakus",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
spartakus.parts(params.namespace).serviceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus",
|
||||
},
|
||||
name: "spartakus",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
spartakus.parts(params.namespace).deployment(params.usageId),
|
||||
{
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "spartakus-volunteer",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "spartakus-volunteer",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
args: [
|
||||
"volunteer",
|
||||
"--cluster-id=unknown_cluster",
|
||||
"--database=https://stats-collector.kubeflow.org",
|
||||
],
|
||||
image: "gcr.io/google_containers/spartakus-amd64:v1.0.0",
|
||||
name: "volunteer",
|
||||
},
|
||||
],
|
||||
serviceAccountName: "spartakus",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
)
|
||||
|
|
@ -1,241 +0,0 @@
|
|||
local tfjob = import "../tf-job-operator.libsonnet";
|
||||
local params = {
|
||||
namespace:: "test-kf-001",
|
||||
cloud:: "azure",
|
||||
tfJobImage:: "gcr.io/kubeflow-images-public/tf_operator:v20180226-403",
|
||||
tfDefaultImage:: "null",
|
||||
};
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).tfJobDeploy(params.tfJobImage),
|
||||
{
|
||||
apiVersion: "extensions/v1beta1",
|
||||
kind: "Deployment",
|
||||
metadata: {
|
||||
name: "tf-job-operator",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
spec: {
|
||||
replicas: 1,
|
||||
template: {
|
||||
metadata: {
|
||||
labels: {
|
||||
name: "tf-job-operator",
|
||||
},
|
||||
},
|
||||
spec: {
|
||||
containers: [
|
||||
{
|
||||
command: [
|
||||
"/opt/mlkube/tf-operator",
|
||||
"--controller-config-file=/etc/config/controller_config_file.yaml",
|
||||
"--alsologtostderr",
|
||||
"-v=1",
|
||||
],
|
||||
env: [
|
||||
{
|
||||
name: "MY_POD_NAMESPACE",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "metadata.namespace",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MY_POD_NAME",
|
||||
valueFrom: {
|
||||
fieldRef: {
|
||||
fieldPath: "metadata.name",
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
image: "gcr.io/kubeflow-images-public/tf_operator:v20180226-403",
|
||||
name: "tf-job-operator",
|
||||
volumeMounts: [
|
||||
{
|
||||
mountPath: "/etc/config",
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
serviceAccountName: "tf-job-operator",
|
||||
volumes: [
|
||||
{
|
||||
configMap: {
|
||||
name: "tf-job-operator-config",
|
||||
},
|
||||
name: "config-volume",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).configMap(params.cloud, params.tfDefaultImage),
|
||||
{
|
||||
apiVersion: "v1",
|
||||
data: {
|
||||
"controller_config_file.yaml": '{\n "grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py"\n}',
|
||||
},
|
||||
kind: "ConfigMap",
|
||||
metadata: {
|
||||
name: "tf-job-operator-config",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).serviceAccount,
|
||||
{
|
||||
apiVersion: "v1",
|
||||
kind: "ServiceAccount",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-job-operator",
|
||||
},
|
||||
name: "tf-job-operator",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).operatorRole,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRole",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-job-operator",
|
||||
},
|
||||
name: "tf-job-operator",
|
||||
},
|
||||
rules: [
|
||||
{
|
||||
apiGroups: [
|
||||
"tensorflow.org",
|
||||
"kubeflow.org",
|
||||
],
|
||||
resources: [
|
||||
"tfjobs",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"apiextensions.k8s.io",
|
||||
],
|
||||
resources: [
|
||||
"customresourcedefinitions",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"storage.k8s.io",
|
||||
],
|
||||
resources: [
|
||||
"storageclasses",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"batch",
|
||||
],
|
||||
resources: [
|
||||
"jobs",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"",
|
||||
],
|
||||
resources: [
|
||||
"configmaps",
|
||||
"pods",
|
||||
"services",
|
||||
"endpoints",
|
||||
"persistentvolumeclaims",
|
||||
"events",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
{
|
||||
apiGroups: [
|
||||
"apps",
|
||||
"extensions",
|
||||
],
|
||||
resources: [
|
||||
"deployments",
|
||||
],
|
||||
verbs: [
|
||||
"*",
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).operatorRoleBinding,
|
||||
{
|
||||
apiVersion: "rbac.authorization.k8s.io/v1beta1",
|
||||
kind: "ClusterRoleBinding",
|
||||
metadata: {
|
||||
labels: {
|
||||
app: "tf-job-operator",
|
||||
},
|
||||
name: "tf-job-operator",
|
||||
},
|
||||
roleRef: {
|
||||
apiGroup: "rbac.authorization.k8s.io",
|
||||
kind: "ClusterRole",
|
||||
name: "tf-job-operator",
|
||||
},
|
||||
subjects: [
|
||||
{
|
||||
kind: "ServiceAccount",
|
||||
name: "tf-job-operator",
|
||||
namespace: "test-kf-001",
|
||||
},
|
||||
],
|
||||
}
|
||||
) &&
|
||||
|
||||
std.assertEqual(
|
||||
tfjob.parts(params.namespace).crd,
|
||||
{
|
||||
apiVersion: "apiextensions.k8s.io/v1beta1",
|
||||
kind: "CustomResourceDefinition",
|
||||
metadata: {
|
||||
name: "tfjobs.kubeflow.org",
|
||||
},
|
||||
spec: {
|
||||
group: "kubeflow.org",
|
||||
names: {
|
||||
kind: "TFJob",
|
||||
plural: "tfjobs",
|
||||
singular: "tfjob",
|
||||
},
|
||||
version: "v1alpha1",
|
||||
},
|
||||
}
|
||||
)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue