mirror of https://github.com/kubeflow/examples.git
Add demo scripts & improvements to instructions (#84)
* Add setup scripts & github token param * Clarify instructions Add pointers to resolution for common friction points of new cluster setup: GitHub rate limiting and RBAC permissions Setup persistent disk before Jupyterhub so that it is only setup once Clarify instructions about copying trained model files locally Add version number to frontend image build Add github_token ks parameter for frontend * Change port to 8080 Fix indentation of bullet points * Fix var name & link spacing * Update description of serving script * Use a single ksonnet environment Move ksonnet app out of notebooks subdirectory Rename ksonnet app to ks-kubeflow Update instructions & scripts Remove instructions to delete ksonnet app directory * Remove github access token
This commit is contained in:
parent
6cf382f597
commit
fb2fb26f71
|
@ -0,0 +1,18 @@
|
|||
apiVersion: 0.1.0
|
||||
gitVersion:
|
||||
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
|
||||
refSpec: v0.1.0
|
||||
kind: ksonnet.io/registry
|
||||
libraries:
|
||||
argo:
|
||||
path: argo
|
||||
version: master
|
||||
core:
|
||||
path: core
|
||||
version: master
|
||||
tf-job:
|
||||
path: tf-job
|
||||
version: master
|
||||
tf-serving:
|
||||
path: tf-serving
|
||||
version: master
|
|
@ -0,0 +1,43 @@
|
|||
apiVersion: 0.1.0
|
||||
environments:
|
||||
cloud:
|
||||
destination:
|
||||
namespace: namespace
|
||||
server: https://35.188.73.10
|
||||
k8sVersion: v1.7.0
|
||||
path: cloud
|
||||
default:
|
||||
destination:
|
||||
namespace: default
|
||||
server: https://35.188.73.10
|
||||
k8sVersion: v1.7.0
|
||||
path: default
|
||||
kind: ksonnet.io/app
|
||||
libraries:
|
||||
core:
|
||||
gitVersion:
|
||||
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
|
||||
refSpec: v0.1.0
|
||||
name: core
|
||||
registry: kubeflow
|
||||
seldon:
|
||||
gitVersion:
|
||||
commitSha: 5eb0d7260fedddbd02081711ff7e945f035427d8
|
||||
refSpec: master
|
||||
name: seldon
|
||||
registry: kubeflow
|
||||
name: ks-kubeflow
|
||||
registries:
|
||||
incubator:
|
||||
gitVersion:
|
||||
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
|
||||
refSpec: master
|
||||
protocol: github
|
||||
uri: github.com/ksonnet/parts/tree/master/incubator
|
||||
kubeflow:
|
||||
gitVersion:
|
||||
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
|
||||
refSpec: v0.1.0
|
||||
protocol: github
|
||||
uri: github.com/kubeflow/kubeflow/tree/v0.1.0/kubeflow
|
||||
version: 0.0.1
|
|
@ -0,0 +1,18 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["issue-summarization-model-serving"];
|
||||
local k = import "k.libsonnet";
|
||||
local serve = import "kubeflow/seldon/serve-simple.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
local name = params.name;
|
||||
local image = params.image;
|
||||
local namespace = updatedParams.namespace;
|
||||
local replicas = params.replicas;
|
||||
local endpoint = params.endpoint;
|
||||
|
||||
k.core.v1.list.new(serve.parts(namespace).serve(name, image, replicas, endpoint))
|
|
@ -0,0 +1,12 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components["kubeflow-core"];
|
||||
local k = import "k.libsonnet";
|
||||
local all = import "kubeflow/core/all.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace
|
||||
};
|
||||
|
||||
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))
|
|
@ -0,0 +1,54 @@
|
|||
{
|
||||
global: {
|
||||
// User-defined global parameters; accessible to all component and environments, Ex:
|
||||
// replicas: 4,
|
||||
},
|
||||
components: {
|
||||
// Component-level parameters, defined initially from 'ks prototype use ...'
|
||||
// Each object below should correspond to a component in the components/ directory
|
||||
"kubeflow-core": {
|
||||
cloud: "null",
|
||||
disks: "null",
|
||||
jupyterHubAuthenticator: "null",
|
||||
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
|
||||
jupyterHubServiceType: "ClusterIP",
|
||||
jupyterNotebookPVCMount: "/home/jovyan/work",
|
||||
name: "kubeflow-core",
|
||||
namespace: "null",
|
||||
reportUsage: "true",
|
||||
tfAmbassadorServiceType: "ClusterIP",
|
||||
tfDefaultImage: "null",
|
||||
tfJobImage: "gcr.io/kubeflow-images-staging/tf_operator:v20180329-a7511ff",
|
||||
tfJobUiServiceType: "ClusterIP",
|
||||
usageId: "7cf1496f-7c67-4dc4-8ce5-1e28f3e03bcd",
|
||||
},
|
||||
seldon: {
|
||||
apifeImage: "seldonio/apife:0.1.5",
|
||||
apifeServiceType: "NodePort",
|
||||
engineImage: "seldonio/engine:0.1.5",
|
||||
name: "seldon",
|
||||
namespace: "null",
|
||||
operatorImage: "seldonio/cluster-manager:0.1.5",
|
||||
operatorJavaOpts: "null",
|
||||
operatorSpringOpts: "null",
|
||||
withApife: "false",
|
||||
withRbac: "true",
|
||||
},
|
||||
"issue-summarization-model-serving": {
|
||||
endpoint: "REST",
|
||||
image: "null",
|
||||
name: "issue-summarization",
|
||||
namespace: "null",
|
||||
replicas: 2,
|
||||
},
|
||||
tensor2tensor: {
|
||||
namespace: "null",
|
||||
},
|
||||
tfjob: {
|
||||
namespace: "null",
|
||||
},
|
||||
ui: {
|
||||
namespace: "null",
|
||||
},
|
||||
},
|
||||
}
|
|
@ -0,0 +1,57 @@
|
|||
local env = std.extVar("__ksonnet/environments");
|
||||
local params = std.extVar("__ksonnet/params").components.seldon;
|
||||
local k = import "k.libsonnet";
|
||||
local core = import "kubeflow/seldon/core.libsonnet";
|
||||
|
||||
// updatedParams uses the environment namespace if
|
||||
// the namespace parameter is not explicitly set
|
||||
local updatedParams = params {
|
||||
namespace: if params.namespace == "null" then env.namespace else params.namespace,
|
||||
};
|
||||
|
||||
local name = params.name;
|
||||
local namespace = updatedParams.namespace;
|
||||
local withRbac = params.withRbac;
|
||||
local withApife = params.withApife;
|
||||
|
||||
// APIFE
|
||||
local apifeImage = params.apifeImage;
|
||||
local apifeServiceType = params.apifeServiceType;
|
||||
|
||||
// Cluster Manager (The CRD Operator)
|
||||
local operatorImage = params.operatorImage;
|
||||
local operatorSpringOptsParam = params.operatorSpringOpts;
|
||||
local operatorSpringOpts = if operatorSpringOptsParam != "null" then operatorSpringOptsParam else "";
|
||||
local operatorJavaOptsParam = params.operatorJavaOpts;
|
||||
local operatorJavaOpts = if operatorJavaOptsParam != "null" then operatorJavaOptsParam else "";
|
||||
|
||||
// Engine
|
||||
local engineImage = params.engineImage;
|
||||
|
||||
// APIFE
|
||||
local apife = [
|
||||
core.parts(namespace).apife(apifeImage, withRbac),
|
||||
core.parts(namespace).apifeService(apifeServiceType),
|
||||
];
|
||||
|
||||
local rbac = [
|
||||
core.parts(namespace).rbacServiceAccount(),
|
||||
core.parts(namespace).rbacClusterRoleBinding(),
|
||||
];
|
||||
|
||||
// Core
|
||||
local coreComponents = [
|
||||
core.parts(namespace).deploymentOperator(engineImage, operatorImage, operatorSpringOpts, operatorJavaOpts, withRbac),
|
||||
core.parts(namespace).redisDeployment(),
|
||||
core.parts(namespace).redisService(),
|
||||
core.parts(namespace).crd(),
|
||||
];
|
||||
|
||||
if withRbac == "true" && withApife == "true" then
|
||||
k.core.v1.list.new(apife + rbac + coreComponents)
|
||||
else if withRbac == "true" && withApife == "false" then
|
||||
k.core.v1.list.new(rbac + coreComponents)
|
||||
else if withRbac == "false" && withApife == "true" then
|
||||
k.core.v1.list.new(apife + coreComponents)
|
||||
else if withRbac == "false" && withApife == "false" then
|
||||
k.core.v1.list.new(coreComponents)
|
|
@ -42,6 +42,12 @@
|
|||
containers: [
|
||||
{
|
||||
image: "gcr.io/kubeflow-images-staging/issue-summarization-ui:latest",
|
||||
env: [
|
||||
{
|
||||
name: "GITHUB_TOKEN",
|
||||
value: params.github_token,
|
||||
}
|
||||
],
|
||||
name: "issue-summarization-ui",
|
||||
ports: [
|
||||
{
|
|
@ -1,4 +1,4 @@
|
|||
local components = std.extVar("__ksonnet/components");
|
||||
components {
|
||||
components + {
|
||||
// Insert user-specified overrides here.
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
local base = import "base.libsonnet";
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
base {
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
local params = import "../../components/params.libsonnet";
|
||||
params + {
|
||||
components +: {
|
||||
// Insert component parameter overrides here. Ex:
|
||||
// guestbook +: {
|
||||
// name: "guestbook-dev",
|
||||
// replicas: params.global.replicas,
|
||||
// },
|
||||
"kubeflow-core" +: {
|
||||
cloud: "gke",
|
||||
},
|
||||
ui +: {
|
||||
github_token: "null",
|
||||
},
|
||||
},
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
local base = import "base.libsonnet";
|
||||
local k = import "k.libsonnet";
|
||||
|
||||
base + {
|
||||
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
|
||||
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
local params = import "../../components/params.libsonnet";
|
||||
params {
|
||||
components+: {
|
||||
params + {
|
||||
components +: {
|
||||
// Insert component parameter overrides here. Ex:
|
||||
// guestbook +: {
|
||||
// name: "guestbook-dev",
|
|
@ -18,7 +18,7 @@ local hidden = {
|
|||
},
|
||||
},
|
||||
|
||||
mapContainersWithName(names, f)::
|
||||
mapContainersWithName(names, f) ::
|
||||
local nameSet =
|
||||
if std.type(names) == "array"
|
||||
then std.set(names)
|
||||
|
@ -32,46 +32,46 @@ local hidden = {
|
|||
),
|
||||
};
|
||||
|
||||
k8s {
|
||||
apps:: apps {
|
||||
v1beta1:: apps.v1beta1 {
|
||||
k8s + {
|
||||
apps:: apps + {
|
||||
v1beta1:: apps.v1beta1 + {
|
||||
local v1beta1 = apps.v1beta1,
|
||||
|
||||
daemonSet:: v1beta1.daemonSet {
|
||||
daemonSet:: v1beta1.daemonSet + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
|
||||
deployment:: v1beta1.deployment {
|
||||
deployment:: v1beta1.deployment + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
core:: core {
|
||||
v1:: core.v1 {
|
||||
core:: core + {
|
||||
v1:: core.v1 + {
|
||||
list:: {
|
||||
new(items)::
|
||||
{ apiVersion: "v1" } +
|
||||
{ kind: "List" } +
|
||||
{apiVersion: "v1"} +
|
||||
{kind: "List"} +
|
||||
self.items(items),
|
||||
|
||||
items(items):: if std.type(items) == "array" then { items+: items } else { items+: [items] },
|
||||
items(items):: if std.type(items) == "array" then {items+: items} else {items+: [items]},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
extensions:: extensions {
|
||||
v1beta1:: extensions.v1beta1 {
|
||||
extensions:: extensions + {
|
||||
v1beta1:: extensions.v1beta1 + {
|
||||
local v1beta1 = extensions.v1beta1,
|
||||
|
||||
daemonSet:: v1beta1.daemonSet {
|
||||
daemonSet:: v1beta1.daemonSet + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
||||
|
||||
deployment:: v1beta1.deployment {
|
||||
deployment:: v1beta1.deployment + {
|
||||
mapContainers(f):: hidden.mapContainers(f),
|
||||
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
|
||||
},
|
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +0,0 @@
|
|||
apiVersion: 0.1.0
|
||||
environments:
|
||||
default:
|
||||
destination:
|
||||
namespace: namespace
|
||||
server: https://1.2.3.4
|
||||
k8sVersion: v1.7.0
|
||||
path: default
|
||||
kind: ksonnet.io/app
|
||||
name: ks-app
|
||||
registries:
|
||||
incubator:
|
||||
gitVersion:
|
||||
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
|
||||
refSpec: master
|
||||
protocol: github
|
||||
uri: github.com/ksonnet/parts/tree/master/incubator
|
||||
version: 0.0.1
|
|
@ -1,19 +0,0 @@
|
|||
{
|
||||
global: {
|
||||
// User-defined global parameters; accessible to all component and environments, Ex:
|
||||
// replicas: 4,
|
||||
},
|
||||
components: {
|
||||
// Component-level parameters, defined initially from 'ks prototype use ...'
|
||||
// Each object below should correspond to a component in the components/ directory
|
||||
tfjob: {
|
||||
|
||||
},
|
||||
tensor2tensor: {
|
||||
|
||||
},
|
||||
ui: {
|
||||
|
||||
},
|
||||
},
|
||||
}
|
|
@ -20,7 +20,7 @@ To build the frontend image, issue the following commands:
|
|||
|
||||
```
|
||||
cd docker
|
||||
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui .
|
||||
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui:0.1 .
|
||||
```
|
||||
|
||||
## Store the frontend image
|
||||
|
@ -34,19 +34,21 @@ gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization-ui:0.1
|
|||
|
||||
## Deploy the frontend image to your kubernetes cluster
|
||||
|
||||
[notebooks](notebooks) contains a ksonnet app([ks-app](notebooks/ks-app)). The ui component in the ks-app contains the frontend image deployment.
|
||||
The folder [ks-kubeflow](ks-kubeflow) contains a ksonnet app. The ui component in the ks-kubeflow app contains the frontend image deployment.
|
||||
|
||||
Create an environment to deploy the ksonnet app
|
||||
To avoid rate-limiting by the GitHub API, you will need an [authentication token](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md) stored in the form of an environment variable `${GITHUB_TOKEN}`. The token does not require any permissions and is only used to prevent anonymous API calls.
|
||||
|
||||
To use this token, set it as a parameter in the ui component:
|
||||
|
||||
```commandline
|
||||
cd notebooks/ks-app
|
||||
ks env add frontendenv --namespace ${NAMESPACE}
|
||||
cd ks-kubeflow
|
||||
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
|
||||
```
|
||||
|
||||
To serve the frontend interface, apply the ui component of the ksonnet app:
|
||||
|
||||
```
|
||||
ks apply frontendenv -c ui
|
||||
ks apply ${KF_ENV} -c ui
|
||||
```
|
||||
|
||||
## View results from the frontend
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Instantiates a fresh version of kubeflow on a cluster
|
||||
|
||||
KF_VERSION=v0.1.0
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
KF_ENV=cloud
|
||||
|
||||
# Initialize an empty ksonnet app
|
||||
ks version
|
||||
ks init ks-kubeflow
|
||||
|
||||
# Install kubeflow core package
|
||||
cd ks-kubeflow
|
||||
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/${KF_VERSION}/kubeflow
|
||||
ks pkg install kubeflow/core@${KF_VERSION}
|
||||
|
||||
# Generate core component
|
||||
ks generate core kubeflow-core --name=kubeflow-core
|
||||
|
||||
# Enable anonymous usage metrics
|
||||
ks param set kubeflow-core reportUsage true
|
||||
ks param set kubeflow-core usageId $(uuidgen)
|
||||
|
||||
# Define an environment
|
||||
ks env add ${KF_ENV}
|
||||
|
||||
# Configure our cloud to use GCP features
|
||||
ks param set kubeflow-core cloud gke --env=${KF_ENV}
|
||||
|
||||
# Set Jupyter storageclass
|
||||
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan/work
|
||||
|
||||
# Create a namespace for my deployment
|
||||
kubectl create namespace ${NAMESPACE}
|
||||
ks env set ${KF_ENV} --namespace ${NAMESPACE}
|
||||
|
||||
# Instantiate objects on the cluster
|
||||
ks apply ${KF_ENV} -c kubeflow-core
|
||||
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Setup a shared persistent disk
|
||||
|
||||
PROJECT=${KF_DEV_PROJECT}
|
||||
ZONE=${KF_DEV_ZONE}
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
KF_ENV=cloud
|
||||
PD_DISK_NAME=github-issues-data-${NAMESPACE}
|
||||
|
||||
# Create the disk
|
||||
gcloud --project=${PROJECT} compute disks create --zone=${ZONE} ${PD_DISK_NAME} --description="PD for storing GitHub Issue data." --size=10GB
|
||||
|
||||
# Configure the environment to use the disk
|
||||
cd ks-kubeflow
|
||||
ks param set --env=${KF_ENV} kubeflow-core disks ${PD_DISK_NAME}
|
||||
ks apply ${KF_ENV}
|
||||
|
||||
# Recreate the tf-hub pod so that it picks up the disk config
|
||||
kubectl delete pod tf-hub-0 --namespace=${NAMESPACE}
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Setup access to Jupyterhub from outside the cluster, primarily for viewing via
|
||||
# a browser
|
||||
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
|
||||
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=tf-hub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"`
|
||||
kubectl port-forward --namespace=${NAMESPACE} $PODNAME 8000:8000
|
||||
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Add Seldon Core to an existing kubeflow cluster
|
||||
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
KF_ENV=cloud
|
||||
|
||||
cd ks-kubeflow
|
||||
|
||||
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
|
||||
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
|
||||
# Install the kubeflow/seldon package
|
||||
ks pkg install kubeflow/seldon
|
||||
# Generate the seldon component and deploy it
|
||||
ks generate seldon seldon --name=seldon --namespace=${NAMESPACE}
|
||||
ks apply ${KF_ENV} -c seldon
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Deploys a serving component using an image generated by Seldon.
|
||||
# Opens a port to the cluster for calling the service locally.
|
||||
|
||||
PROJECT=${KF_DEV_PROJECT}
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
KF_ENV=cloud
|
||||
|
||||
cd ks-kubeflow
|
||||
|
||||
# Generate component
|
||||
ks generate seldon-serve-simple issue-summarization-model-serving \
|
||||
--name=issue-summarization \
|
||||
--image=gcr.io/${PROJECT}/issue-summarization-${NAMESPACE}:0.1 \
|
||||
--namespace=${NAMESPACE} \
|
||||
--replicas=2
|
||||
|
||||
# Deploy it to cluster
|
||||
ks apply ${KF_ENV} -c issue-summarization-model-serving
|
||||
|
||||
# Access from local machine
|
||||
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8001:80
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Build and deploy a UI for accessing the trained model
|
||||
|
||||
PROJECT=${KF_DEV_PROJECT}
|
||||
NAMESPACE=${KF_DEV_NAMESPACE}
|
||||
KF_ENV=cloud
|
||||
|
||||
# Create the image locally
|
||||
cd docker
|
||||
docker build -t gcr.io/${PROJECT}/issue-summarization-ui-${NAMESPACE}:0.1 .
|
||||
|
||||
# Store in the container repo
|
||||
gcloud docker -- push gcr.io/${PROJECT}/issue-summarization-ui-${NAMESPACE}:0.1
|
||||
|
||||
cd ../ks-kubeflow
|
||||
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
|
||||
ks apply ${KF_ENV} -c ui
|
||||
|
||||
# Open access outside the cluster
|
||||
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80
|
||||
|
|
@ -46,6 +46,7 @@ Now that we have an image with our model server, we can deploy it to our kuberne
|
|||
Install the CRD and it's controller using the seldon prototype
|
||||
|
||||
```bash
|
||||
cd ks-kubeflow
|
||||
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
|
||||
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
|
||||
# Install the kubeflow/seldon package
|
||||
|
|
|
@ -8,7 +8,7 @@ In this part, you will setup kubeflow on an existing kubernetes cluster.
|
|||
* `kubectl` CLI pointing to the kubernetes cluster
|
||||
* Make sure that you can run `kubectl get nodes` from your terminal
|
||||
successfully
|
||||
* The ksonnet CLI: [ks](https://ksonnet.io/#get-started)
|
||||
* The ksonnet CLI, v0.9.2 or higher: [ks](https://ksonnet.io/#get-started)
|
||||
|
||||
## Kubeflow setup
|
||||
|
||||
|
@ -18,54 +18,54 @@ instructions on how to setup kubeflow on your kubernetes cluster. Specifically,
|
|||
complete the following sections:
|
||||
* [Deploy
|
||||
Kubeflow](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#deploy-kubeflow)
|
||||
* The `ks-kubeflow` directory can be used instead of creating a ksonnet
|
||||
app from scratch.
|
||||
* If you run into
|
||||
[API rate limiting errors](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md#github-rate-limiting-errors),
|
||||
ensure you have a `${GITHUB_TOKEN}` environment variable set.
|
||||
* If you run into
|
||||
[RBAC permissions issues](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#rbac-clusters)
|
||||
running `ks apply` commands, be sure you have created a `cluster-admin` ClusterRoleBinding for your username.
|
||||
* [Setup a persistent disk](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#advanced-customization)
|
||||
* We need a shared persistent disk to store our training data since
|
||||
containers' filesystems are ephemeral and don't have a lot of storage space.
|
||||
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
|
||||
name `github-issues-data`.
|
||||
* After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
|
||||
picks up the NFS mount. You can delete it by running `kubectl delete pod
|
||||
tf-hub-0 -n=${NAMESPACE}`
|
||||
* [Bringing up a
|
||||
Notebook](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#bringing-up-a-jupyter-notebook)
|
||||
* When choosing an image for your cluster in the JupyterHub UI, use the
|
||||
image from this example:
|
||||
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
|
||||
image from this example:
|
||||
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
|
||||
|
||||
After completing that, you should have the following ready:
|
||||
|
||||
* A ksonnet app in a directory named `my-kubeflow`
|
||||
* A ksonnet app in a directory named `ks-kubeflow`
|
||||
* An output similar to this for `kubectl get pods`
|
||||
|
||||
```
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
ambassador-7987df44b9-4pht8 2/2 Running 0 1m
|
||||
ambassador-7987df44b9-dh5h6 2/2 Running 0 1m
|
||||
ambassador-7987df44b9-qrgsm 2/2 Running 0 1m
|
||||
tf-hub-0 1/1 Running 0 1m
|
||||
tf-job-operator-78757955b-qkg7s 1/1 Running 0 1m
|
||||
NAME READY STATUS RESTARTS AGE
|
||||
ambassador-75bb54594-dnxsd 2/2 Running 0 3m
|
||||
ambassador-75bb54594-hjj6m 2/2 Running 0 3m
|
||||
ambassador-75bb54594-z948h 2/2 Running 0 3m
|
||||
jupyter-chasm 1/1 Running 0 49s
|
||||
spartakus-volunteer-565b99cd69-knjf2 1/1 Running 0 3m
|
||||
tf-hub-0 1/1 Running 0 3m
|
||||
tf-job-dashboard-6c757d8684-d299l 1/1 Running 0 3m
|
||||
tf-job-operator-77776c8446-lpprm 1/1 Running 0 3m
|
||||
```
|
||||
|
||||
* A Jupyter Notebook accessible at `http://127.0.0.1:8000`
|
||||
|
||||
## Provision storage for training data
|
||||
|
||||
We need a shared persistent disk to store our training data since containers'
|
||||
filesystems are ephemeral and don't have a lot of storage space.
|
||||
|
||||
The [Advanced
|
||||
Customization](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#advanced-customization)
|
||||
section of the [user
|
||||
guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) has
|
||||
instructions on how to provision a cluster-wide shared NFS.
|
||||
|
||||
For this example, provision a `10GB` NFS mount with the name
|
||||
`github-issues-data`.
|
||||
|
||||
After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
|
||||
picks up the NFS mount. You can delete it by running `kubectl delete pod
|
||||
tf-hub-0 -n=${NAMESPACE}`
|
||||
|
||||
At this point you should have a 10GB mount `/mnt/github-issues-data` in your
|
||||
Jupyter Notebook pod. Check this by running `!df` in your Jupyter Notebook.
|
||||
* A 10GB mount `/mnt/github-issues-data` in your Jupyter Notebook pod. Check this
|
||||
by running `!df` in your Jupyter Notebook.
|
||||
|
||||
## Summary
|
||||
|
||||
* We created a ksonnet app for our kubeflow deployment
|
||||
* We created a disk for storing our training data
|
||||
* We deployed the kubeflow-core component to our kubernetes cluster
|
||||
* We created a disk for storing our training data
|
||||
* We connected to JupyterHub and spawned a new Jupyter notebook
|
||||
|
||||
Next: [Training the model using our cluster](training_the_model.md)
|
||||
|
|
|
@ -13,8 +13,3 @@ gcloud --project=${PROJECT} compute disks delete --zone=${ZONE} ${PD_DISK_NAME}
|
|||
|
||||
```
|
||||
|
||||
Delete the kubeflow-app directory
|
||||
|
||||
```
|
||||
rm -rf my-kubeflow
|
||||
```
|
||||
|
|
|
@ -25,13 +25,14 @@ After training completes, download the resulting files to your local machine. Th
|
|||
* `body_pp.dpkl` - the serialized body preprocessor
|
||||
* `title_pp.dpkl` - the serialized title preprocessor
|
||||
|
||||
In a locally cloned copy of the same repo, issue the following commands to place these three files into the `examples/github_issue_summarization/notebooks` folder:
|
||||
If you haven't already, clone the [kubeflow/examples](https://github.com/kubeflow/examples) repo locally, then issue the following commands to place these three files into the `github_issue_summarization/notebooks` folder on your local machine:
|
||||
|
||||
```
|
||||
cd github_issue_summarization/notebooks
|
||||
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=jupyterhub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"`
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 examples/github_issue_summarization/notebooks
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl examples/github_issue_summarization/notebooks
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl examples/github_issue_summarization/notebooks
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 .
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl .
|
||||
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl .
|
||||
```
|
||||
|
||||
|
||||
|
|
|
@ -41,37 +41,31 @@ kubectl --namespace=${NAMESPACE} create secret generic gcp-credentials --from-fi
|
|||
|
||||
## Run the TFJob using your image
|
||||
|
||||
[notebooks](notebooks) contains a ksonnet app([ks-app](notebooks/ks-app)) to deploy the TFJob.
|
||||
|
||||
Create an environment to deploy the ksonnet app
|
||||
|
||||
```commandline
|
||||
cd notebooks/ks-app
|
||||
ks env add tfjob --namespace ${NAMESPACE}
|
||||
```
|
||||
[ks-kubeflow](ks-kubeflow) contains a ksonnet app to deploy the TFJob.
|
||||
|
||||
Set the appropriate params for the tfjob component
|
||||
|
||||
```commandline
|
||||
ks param set tfjob namespace ${NAMESPACE} --env=tfjob
|
||||
cd ks-kubeflow
|
||||
ks param set tfjob namespace ${NAMESPACE} --env=${KF_ENV}
|
||||
|
||||
# The image pushed in the previous step
|
||||
ks param set tfjob image "gcr.io/agwl-kubeflow/tf-job-issue-summarization:latest" --env=tfjob
|
||||
ks param set tfjob image "gcr.io/agwl-kubeflow/tf-job-issue-summarization:latest" --env=${KF_ENV}
|
||||
|
||||
# Sample Size for training
|
||||
ks param set tfjob sample_size 100000 --env=tfjob
|
||||
ks param set tfjob sample_size 100000 --env=${KF_ENV}
|
||||
|
||||
# Set the input and output GCS Bucket locations
|
||||
ks param set tfjob input_data_gcs_bucket "kubeflow-examples" --env=tfjob
|
||||
ks param set tfjob input_data_gcs_path "github-issue-summarization-data/github-issues.zip" --env=tfjob
|
||||
ks param set tfjob output_model_gcs_bucket "kubeflow-examples" --env=tfjob
|
||||
ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output_model.h5" --env=tfjob
|
||||
ks param set tfjob input_data_gcs_bucket "kubeflow-examples" --env=${KF_ENV}
|
||||
ks param set tfjob input_data_gcs_path "github-issue-summarization-data/github-issues.zip" --env=${KF_ENV}
|
||||
ks param set tfjob output_model_gcs_bucket "kubeflow-examples" --env=${KF_ENV}
|
||||
ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output_model.h5" --env=${KF_ENV}
|
||||
```
|
||||
|
||||
Deploy the app:
|
||||
|
||||
```commandline
|
||||
ks apply tfjob -c tfjob
|
||||
ks apply ${KF_ENV} -c tfjob
|
||||
```
|
||||
|
||||
In a while you should see a new pod with the label `tf_job_name=tf-job-issue-summarization`
|
||||
|
|
Loading…
Reference in New Issue