Add demo scripts & improvements to instructions (#84)

* Add setup scripts & github token param

* Clarify instructions

Add pointers to resolution for common friction points of new cluster
setup: GitHub rate limiting and RBAC permissions
Setup persistent disk before Jupyterhub so that it is only setup once
Clarify instructions about copying trained model files locally
Add version number to frontend image build
Add github_token ks parameter for frontend

* Change port to 8080

Fix indentation of bullet points

* Fix var name & link spacing

* Update description of serving script

* Use a single ksonnet environment

Move ksonnet app out of notebooks subdirectory
Rename ksonnet app to ks-kubeflow
Update instructions & scripts
Remove instructions to delete ksonnet app directory

* Remove github access token
This commit is contained in:
Michelle Casbon 2018-04-23 16:23:59 -07:00 committed by k8s-ci-robot
parent 6cf382f597
commit fb2fb26f71
35 changed files with 6465 additions and 6142 deletions

View File

@ -0,0 +1,18 @@
apiVersion: 0.1.0
gitVersion:
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
refSpec: v0.1.0
kind: ksonnet.io/registry
libraries:
argo:
path: argo
version: master
core:
path: core
version: master
tf-job:
path: tf-job
version: master
tf-serving:
path: tf-serving
version: master

View File

@ -0,0 +1,43 @@
apiVersion: 0.1.0
environments:
cloud:
destination:
namespace: namespace
server: https://35.188.73.10
k8sVersion: v1.7.0
path: cloud
default:
destination:
namespace: default
server: https://35.188.73.10
k8sVersion: v1.7.0
path: default
kind: ksonnet.io/app
libraries:
core:
gitVersion:
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
refSpec: v0.1.0
name: core
registry: kubeflow
seldon:
gitVersion:
commitSha: 5eb0d7260fedddbd02081711ff7e945f035427d8
refSpec: master
name: seldon
registry: kubeflow
name: ks-kubeflow
registries:
incubator:
gitVersion:
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
refSpec: master
protocol: github
uri: github.com/ksonnet/parts/tree/master/incubator
kubeflow:
gitVersion:
commitSha: 20bdb46ee436b56fb9e3690d2a383b2afc8989f4
refSpec: v0.1.0
protocol: github
uri: github.com/kubeflow/kubeflow/tree/v0.1.0/kubeflow
version: 0.0.1

View File

@ -0,0 +1,18 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["issue-summarization-model-serving"];
local k = import "k.libsonnet";
local serve = import "kubeflow/seldon/serve-simple.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
local name = params.name;
local image = params.image;
local namespace = updatedParams.namespace;
local replicas = params.replicas;
local endpoint = params.endpoint;
k.core.v1.list.new(serve.parts(namespace).serve(name, image, replicas, endpoint))

View File

@ -0,0 +1,12 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components["kubeflow-core"];
local k = import "k.libsonnet";
local all = import "kubeflow/core/all.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace
};
std.prune(k.core.v1.list.new(all.parts(updatedParams).all))

View File

@ -0,0 +1,54 @@
{
global: {
// User-defined global parameters; accessible to all component and environments, Ex:
// replicas: 4,
},
components: {
// Component-level parameters, defined initially from 'ks prototype use ...'
// Each object below should correspond to a component in the components/ directory
"kubeflow-core": {
cloud: "null",
disks: "null",
jupyterHubAuthenticator: "null",
jupyterHubImage: "gcr.io/kubeflow/jupyterhub-k8s:1.0.1",
jupyterHubServiceType: "ClusterIP",
jupyterNotebookPVCMount: "/home/jovyan/work",
name: "kubeflow-core",
namespace: "null",
reportUsage: "true",
tfAmbassadorServiceType: "ClusterIP",
tfDefaultImage: "null",
tfJobImage: "gcr.io/kubeflow-images-staging/tf_operator:v20180329-a7511ff",
tfJobUiServiceType: "ClusterIP",
usageId: "7cf1496f-7c67-4dc4-8ce5-1e28f3e03bcd",
},
seldon: {
apifeImage: "seldonio/apife:0.1.5",
apifeServiceType: "NodePort",
engineImage: "seldonio/engine:0.1.5",
name: "seldon",
namespace: "null",
operatorImage: "seldonio/cluster-manager:0.1.5",
operatorJavaOpts: "null",
operatorSpringOpts: "null",
withApife: "false",
withRbac: "true",
},
"issue-summarization-model-serving": {
endpoint: "REST",
image: "null",
name: "issue-summarization",
namespace: "null",
replicas: 2,
},
tensor2tensor: {
namespace: "null",
},
tfjob: {
namespace: "null",
},
ui: {
namespace: "null",
},
},
}

View File

@ -0,0 +1,57 @@
local env = std.extVar("__ksonnet/environments");
local params = std.extVar("__ksonnet/params").components.seldon;
local k = import "k.libsonnet";
local core = import "kubeflow/seldon/core.libsonnet";
// updatedParams uses the environment namespace if
// the namespace parameter is not explicitly set
local updatedParams = params {
namespace: if params.namespace == "null" then env.namespace else params.namespace,
};
local name = params.name;
local namespace = updatedParams.namespace;
local withRbac = params.withRbac;
local withApife = params.withApife;
// APIFE
local apifeImage = params.apifeImage;
local apifeServiceType = params.apifeServiceType;
// Cluster Manager (The CRD Operator)
local operatorImage = params.operatorImage;
local operatorSpringOptsParam = params.operatorSpringOpts;
local operatorSpringOpts = if operatorSpringOptsParam != "null" then operatorSpringOptsParam else "";
local operatorJavaOptsParam = params.operatorJavaOpts;
local operatorJavaOpts = if operatorJavaOptsParam != "null" then operatorJavaOptsParam else "";
// Engine
local engineImage = params.engineImage;
// APIFE
local apife = [
core.parts(namespace).apife(apifeImage, withRbac),
core.parts(namespace).apifeService(apifeServiceType),
];
local rbac = [
core.parts(namespace).rbacServiceAccount(),
core.parts(namespace).rbacClusterRoleBinding(),
];
// Core
local coreComponents = [
core.parts(namespace).deploymentOperator(engineImage, operatorImage, operatorSpringOpts, operatorJavaOpts, withRbac),
core.parts(namespace).redisDeployment(),
core.parts(namespace).redisService(),
core.parts(namespace).crd(),
];
if withRbac == "true" && withApife == "true" then
k.core.v1.list.new(apife + rbac + coreComponents)
else if withRbac == "true" && withApife == "false" then
k.core.v1.list.new(rbac + coreComponents)
else if withRbac == "false" && withApife == "true" then
k.core.v1.list.new(apife + coreComponents)
else if withRbac == "false" && withApife == "false" then
k.core.v1.list.new(coreComponents)

View File

@ -42,6 +42,12 @@
containers: [
{
image: "gcr.io/kubeflow-images-staging/issue-summarization-ui:latest",
env: [
{
name: "GITHUB_TOKEN",
value: params.github_token,
}
],
name: "issue-summarization-ui",
ports: [
{

View File

@ -1,4 +1,4 @@
local components = std.extVar("__ksonnet/components");
components {
components + {
// Insert user-specified overrides here.
}

View File

@ -1,7 +1,7 @@
local base = import "base.libsonnet";
local k = import "k.libsonnet";
base {
base + {
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
}

View File

@ -0,0 +1,16 @@
local params = import "../../components/params.libsonnet";
params + {
components +: {
// Insert component parameter overrides here. Ex:
// guestbook +: {
// name: "guestbook-dev",
// replicas: params.global.replicas,
// },
"kubeflow-core" +: {
cloud: "gke",
},
ui +: {
github_token: "null",
},
},
}

View File

@ -0,0 +1,7 @@
local base = import "base.libsonnet";
local k = import "k.libsonnet";
base + {
// Insert user-specified overrides here. For example if a component is named "nginx-deployment", you might have something like:
// "nginx-deployment"+: k.deployment.mixin.metadata.labels({foo: "bar"})
}

View File

@ -1,6 +1,6 @@
local params = import "../../components/params.libsonnet";
params {
components+: {
params + {
components +: {
// Insert component parameter overrides here. Ex:
// guestbook +: {
// name: "guestbook-dev",

View File

@ -18,7 +18,7 @@ local hidden = {
},
},
mapContainersWithName(names, f)::
mapContainersWithName(names, f) ::
local nameSet =
if std.type(names) == "array"
then std.set(names)
@ -32,46 +32,46 @@ local hidden = {
),
};
k8s {
apps:: apps {
v1beta1:: apps.v1beta1 {
k8s + {
apps:: apps + {
v1beta1:: apps.v1beta1 + {
local v1beta1 = apps.v1beta1,
daemonSet:: v1beta1.daemonSet {
daemonSet:: v1beta1.daemonSet + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
deployment:: v1beta1.deployment {
deployment:: v1beta1.deployment + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
},
},
core:: core {
v1:: core.v1 {
core:: core + {
v1:: core.v1 + {
list:: {
new(items)::
{ apiVersion: "v1" } +
{ kind: "List" } +
{apiVersion: "v1"} +
{kind: "List"} +
self.items(items),
items(items):: if std.type(items) == "array" then { items+: items } else { items+: [items] },
items(items):: if std.type(items) == "array" then {items+: items} else {items+: [items]},
},
},
},
extensions:: extensions {
v1beta1:: extensions.v1beta1 {
extensions:: extensions + {
v1beta1:: extensions.v1beta1 + {
local v1beta1 = extensions.v1beta1,
daemonSet:: v1beta1.daemonSet {
daemonSet:: v1beta1.daemonSet + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},
deployment:: v1beta1.deployment {
deployment:: v1beta1.deployment + {
mapContainers(f):: hidden.mapContainers(f),
mapContainersWithName(names, f):: hidden.mapContainersWithName(names, f),
},

View File

@ -1,18 +0,0 @@
apiVersion: 0.1.0
environments:
default:
destination:
namespace: namespace
server: https://1.2.3.4
k8sVersion: v1.7.0
path: default
kind: ksonnet.io/app
name: ks-app
registries:
incubator:
gitVersion:
commitSha: 40285d8a14f1ac5787e405e1023cf0c07f6aa28c
refSpec: master
protocol: github
uri: github.com/ksonnet/parts/tree/master/incubator
version: 0.0.1

View File

@ -1,19 +0,0 @@
{
global: {
// User-defined global parameters; accessible to all component and environments, Ex:
// replicas: 4,
},
components: {
// Component-level parameters, defined initially from 'ks prototype use ...'
// Each object below should correspond to a component in the components/ directory
tfjob: {
},
tensor2tensor: {
},
ui: {
},
},
}

View File

@ -20,7 +20,7 @@ To build the frontend image, issue the following commands:
```
cd docker
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui .
docker build -t gcr.io/gcr-repository-name/issue-summarization-ui:0.1 .
```
## Store the frontend image
@ -34,19 +34,21 @@ gcloud docker -- push gcr.io/gcr-repository-name/issue-summarization-ui:0.1
## Deploy the frontend image to your kubernetes cluster
[notebooks](notebooks) contains a ksonnet app([ks-app](notebooks/ks-app)). The ui component in the ks-app contains the frontend image deployment.
The folder [ks-kubeflow](ks-kubeflow) contains a ksonnet app. The ui component in the ks-kubeflow app contains the frontend image deployment.
Create an environment to deploy the ksonnet app
To avoid rate-limiting by the GitHub API, you will need an [authentication token](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md) stored in the form of an environment variable `${GITHUB_TOKEN}`. The token does not require any permissions and is only used to prevent anonymous API calls.
To use this token, set it as a parameter in the ui component:
```commandline
cd notebooks/ks-app
ks env add frontendenv --namespace ${NAMESPACE}
cd ks-kubeflow
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
```
To serve the frontend interface, apply the ui component of the ksonnet app:
```
ks apply frontendenv -c ui
ks apply ${KF_ENV} -c ui
```
## View results from the frontend

View File

@ -0,0 +1,41 @@
#!/bin/bash
# Instantiates a fresh version of kubeflow on a cluster
KF_VERSION=v0.1.0
NAMESPACE=${KF_DEV_NAMESPACE}
KF_ENV=cloud
# Initialize an empty ksonnet app
ks version
ks init ks-kubeflow
# Install kubeflow core package
cd ks-kubeflow
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/${KF_VERSION}/kubeflow
ks pkg install kubeflow/core@${KF_VERSION}
# Generate core component
ks generate core kubeflow-core --name=kubeflow-core
# Enable anonymous usage metrics
ks param set kubeflow-core reportUsage true
ks param set kubeflow-core usageId $(uuidgen)
# Define an environment
ks env add ${KF_ENV}
# Configure our cloud to use GCP features
ks param set kubeflow-core cloud gke --env=${KF_ENV}
# Set Jupyter storageclass
ks param set kubeflow-core jupyterNotebookPVCMount /home/jovyan/work
# Create a namespace for my deployment
kubectl create namespace ${NAMESPACE}
ks env set ${KF_ENV} --namespace ${NAMESPACE}
# Instantiate objects on the cluster
ks apply ${KF_ENV} -c kubeflow-core

View File

@ -0,0 +1,21 @@
#!/bin/bash
# Setup a shared persistent disk
PROJECT=${KF_DEV_PROJECT}
ZONE=${KF_DEV_ZONE}
NAMESPACE=${KF_DEV_NAMESPACE}
KF_ENV=cloud
PD_DISK_NAME=github-issues-data-${NAMESPACE}
# Create the disk
gcloud --project=${PROJECT} compute disks create --zone=${ZONE} ${PD_DISK_NAME} --description="PD for storing GitHub Issue data." --size=10GB
# Configure the environment to use the disk
cd ks-kubeflow
ks param set --env=${KF_ENV} kubeflow-core disks ${PD_DISK_NAME}
ks apply ${KF_ENV}
# Recreate the tf-hub pod so that it picks up the disk config
kubectl delete pod tf-hub-0 --namespace=${NAMESPACE}

View File

@ -0,0 +1,11 @@
#!/bin/bash
# Setup access to Jupyterhub from outside the cluster, primarily for viewing via
# a browser
NAMESPACE=${KF_DEV_NAMESPACE}
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=tf-hub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"`
kubectl port-forward --namespace=${NAMESPACE} $PODNAME 8000:8000

View File

@ -0,0 +1,17 @@
#!/bin/bash
# Add Seldon Core to an existing kubeflow cluster
NAMESPACE=${KF_DEV_NAMESPACE}
KF_ENV=cloud
cd ks-kubeflow
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
# Install the kubeflow/seldon package
ks pkg install kubeflow/seldon
# Generate the seldon component and deploy it
ks generate seldon seldon --name=seldon --namespace=${NAMESPACE}
ks apply ${KF_ENV} -c seldon

View File

@ -0,0 +1,24 @@
#!/bin/bash
# Deploys a serving component using an image generated by Seldon.
# Opens a port to the cluster for calling the service locally.
PROJECT=${KF_DEV_PROJECT}
NAMESPACE=${KF_DEV_NAMESPACE}
KF_ENV=cloud
cd ks-kubeflow
# Generate component
ks generate seldon-serve-simple issue-summarization-model-serving \
--name=issue-summarization \
--image=gcr.io/${PROJECT}/issue-summarization-${NAMESPACE}:0.1 \
--namespace=${NAMESPACE} \
--replicas=2
# Deploy it to cluster
ks apply ${KF_ENV} -c issue-summarization-model-serving
# Access from local machine
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8001:80

View File

@ -0,0 +1,22 @@
#!/bin/bash
# Build and deploy a UI for accessing the trained model
PROJECT=${KF_DEV_PROJECT}
NAMESPACE=${KF_DEV_NAMESPACE}
KF_ENV=cloud
# Create the image locally
cd docker
docker build -t gcr.io/${PROJECT}/issue-summarization-ui-${NAMESPACE}:0.1 .
# Store in the container repo
gcloud docker -- push gcr.io/${PROJECT}/issue-summarization-ui-${NAMESPACE}:0.1
cd ../ks-kubeflow
ks param set ui github_token ${GITHUB_TOKEN} --env ${KF_ENV}
ks apply ${KF_ENV} -c ui
# Open access outside the cluster
kubectl port-forward $(kubectl get pods -n ${NAMESPACE} -l service=ambassador -o jsonpath='{.items[0].metadata.name}') -n ${NAMESPACE} 8080:80

View File

@ -46,6 +46,7 @@ Now that we have an image with our model server, we can deploy it to our kuberne
Install the CRD and it's controller using the seldon prototype
```bash
cd ks-kubeflow
# Gives cluster-admin role to the default service account in the ${NAMESPACE}
kubectl create clusterrolebinding seldon-admin --clusterrole=cluster-admin --serviceaccount=${NAMESPACE}:default
# Install the kubeflow/seldon package

View File

@ -8,7 +8,7 @@ In this part, you will setup kubeflow on an existing kubernetes cluster.
* `kubectl` CLI pointing to the kubernetes cluster
* Make sure that you can run `kubectl get nodes` from your terminal
successfully
* The ksonnet CLI: [ks](https://ksonnet.io/#get-started)
* The ksonnet CLI, v0.9.2 or higher: [ks](https://ksonnet.io/#get-started)
## Kubeflow setup
@ -18,54 +18,54 @@ instructions on how to setup kubeflow on your kubernetes cluster. Specifically,
complete the following sections:
* [Deploy
Kubeflow](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#deploy-kubeflow)
* The `ks-kubeflow` directory can be used instead of creating a ksonnet
app from scratch.
* If you run into
[API rate limiting errors](https://github.com/ksonnet/ksonnet/blob/master/docs/troubleshooting.md#github-rate-limiting-errors),
ensure you have a `${GITHUB_TOKEN}` environment variable set.
* If you run into
[RBAC permissions issues](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#rbac-clusters)
running `ks apply` commands, be sure you have created a `cluster-admin` ClusterRoleBinding for your username.
* [Setup a persistent disk](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#advanced-customization)
* We need a shared persistent disk to store our training data since
containers' filesystems are ephemeral and don't have a lot of storage space.
* For this example, provision a `10GB` cluster-wide shared NFS mount with the
name `github-issues-data`.
* After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
picks up the NFS mount. You can delete it by running `kubectl delete pod
tf-hub-0 -n=${NAMESPACE}`
* [Bringing up a
Notebook](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#bringing-up-a-jupyter-notebook)
* When choosing an image for your cluster in the JupyterHub UI, use the
image from this example:
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
image from this example:
[`gcr.io/kubeflow-dev/issue-summarization-notebook-cpu:latest`](https://github.com/kubeflow/examples/blob/master/github_issue_summarization/workflow/Dockerfile).
After completing that, you should have the following ready:
* A ksonnet app in a directory named `my-kubeflow`
* A ksonnet app in a directory named `ks-kubeflow`
* An output similar to this for `kubectl get pods`
```
NAME READY STATUS RESTARTS AGE
ambassador-7987df44b9-4pht8 2/2 Running 0 1m
ambassador-7987df44b9-dh5h6 2/2 Running 0 1m
ambassador-7987df44b9-qrgsm 2/2 Running 0 1m
tf-hub-0 1/1 Running 0 1m
tf-job-operator-78757955b-qkg7s 1/1 Running 0 1m
NAME READY STATUS RESTARTS AGE
ambassador-75bb54594-dnxsd 2/2 Running 0 3m
ambassador-75bb54594-hjj6m 2/2 Running 0 3m
ambassador-75bb54594-z948h 2/2 Running 0 3m
jupyter-chasm 1/1 Running 0 49s
spartakus-volunteer-565b99cd69-knjf2 1/1 Running 0 3m
tf-hub-0 1/1 Running 0 3m
tf-job-dashboard-6c757d8684-d299l 1/1 Running 0 3m
tf-job-operator-77776c8446-lpprm 1/1 Running 0 3m
```
* A Jupyter Notebook accessible at `http://127.0.0.1:8000`
## Provision storage for training data
We need a shared persistent disk to store our training data since containers'
filesystems are ephemeral and don't have a lot of storage space.
The [Advanced
Customization](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md#advanced-customization)
section of the [user
guide](https://github.com/kubeflow/kubeflow/blob/master/user_guide.md) has
instructions on how to provision a cluster-wide shared NFS.
For this example, provision a `10GB` NFS mount with the name
`github-issues-data`.
After the NFS is ready, delete the `tf-hub-0` pod so that it gets recreated and
picks up the NFS mount. You can delete it by running `kubectl delete pod
tf-hub-0 -n=${NAMESPACE}`
At this point you should have a 10GB mount `/mnt/github-issues-data` in your
Jupyter Notebook pod. Check this by running `!df` in your Jupyter Notebook.
* A 10GB mount `/mnt/github-issues-data` in your Jupyter Notebook pod. Check this
by running `!df` in your Jupyter Notebook.
## Summary
* We created a ksonnet app for our kubeflow deployment
* We created a disk for storing our training data
* We deployed the kubeflow-core component to our kubernetes cluster
* We created a disk for storing our training data
* We connected to JupyterHub and spawned a new Jupyter notebook
Next: [Training the model using our cluster](training_the_model.md)

View File

@ -13,8 +13,3 @@ gcloud --project=${PROJECT} compute disks delete --zone=${ZONE} ${PD_DISK_NAME}
```
Delete the kubeflow-app directory
```
rm -rf my-kubeflow
```

View File

@ -25,13 +25,14 @@ After training completes, download the resulting files to your local machine. Th
* `body_pp.dpkl` - the serialized body preprocessor
* `title_pp.dpkl` - the serialized title preprocessor
In a locally cloned copy of the same repo, issue the following commands to place these three files into the `examples/github_issue_summarization/notebooks` folder:
If you haven't already, clone the [kubeflow/examples](https://github.com/kubeflow/examples) repo locally, then issue the following commands to place these three files into the `github_issue_summarization/notebooks` folder on your local machine:
```
cd github_issue_summarization/notebooks
PODNAME=`kubectl get pods --namespace=${NAMESPACE} --selector="app=jupyterhub" --output=template --template="{{with index .items 0}}{{.metadata.name}}{{end}}"`
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 examples/github_issue_summarization/notebooks
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl examples/github_issue_summarization/notebooks
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl examples/github_issue_summarization/notebooks
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/seq2seq_model_tutorial.h5 .
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/body_pp.dpkl .
kubectl --namespace=${NAMESPACE} cp ${PODNAME}:/home/jovyan/examples/github_issue_summarization/notebooks/title_pp.dpkl .
```

View File

@ -41,37 +41,31 @@ kubectl --namespace=${NAMESPACE} create secret generic gcp-credentials --from-fi
## Run the TFJob using your image
[notebooks](notebooks) contains a ksonnet app([ks-app](notebooks/ks-app)) to deploy the TFJob.
Create an environment to deploy the ksonnet app
```commandline
cd notebooks/ks-app
ks env add tfjob --namespace ${NAMESPACE}
```
[ks-kubeflow](ks-kubeflow) contains a ksonnet app to deploy the TFJob.
Set the appropriate params for the tfjob component
```commandline
ks param set tfjob namespace ${NAMESPACE} --env=tfjob
cd ks-kubeflow
ks param set tfjob namespace ${NAMESPACE} --env=${KF_ENV}
# The image pushed in the previous step
ks param set tfjob image "gcr.io/agwl-kubeflow/tf-job-issue-summarization:latest" --env=tfjob
ks param set tfjob image "gcr.io/agwl-kubeflow/tf-job-issue-summarization:latest" --env=${KF_ENV}
# Sample Size for training
ks param set tfjob sample_size 100000 --env=tfjob
ks param set tfjob sample_size 100000 --env=${KF_ENV}
# Set the input and output GCS Bucket locations
ks param set tfjob input_data_gcs_bucket "kubeflow-examples" --env=tfjob
ks param set tfjob input_data_gcs_path "github-issue-summarization-data/github-issues.zip" --env=tfjob
ks param set tfjob output_model_gcs_bucket "kubeflow-examples" --env=tfjob
ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output_model.h5" --env=tfjob
ks param set tfjob input_data_gcs_bucket "kubeflow-examples" --env=${KF_ENV}
ks param set tfjob input_data_gcs_path "github-issue-summarization-data/github-issues.zip" --env=${KF_ENV}
ks param set tfjob output_model_gcs_bucket "kubeflow-examples" --env=${KF_ENV}
ks param set tfjob output_model_gcs_path "github-issue-summarization-data/output_model.h5" --env=${KF_ENV}
```
Deploy the app:
```commandline
ks apply tfjob -c tfjob
ks apply ${KF_ENV} -c tfjob
```
In a while you should see a new pod with the label `tf_job_name=tf-job-issue-summarization`