Updating mnist example to fix minio compatibility (#108)

* Updating mnist example to fix minio compatibility

* Changing default sa user for ksonnet entrypoint

* Updating mnist example based on pr feedback.
This commit is contained in:
Elson Rodriguez 2018-04-30 16:14:18 -07:00 committed by k8s-ci-robot
parent 79aa2074cd
commit 7434bb55ba
6 changed files with 28 additions and 22 deletions

View File

@ -2,7 +2,7 @@
FROM ubuntu:16.04
ENV KUBECTL_VERSION v1.9.2
ENV KSONNET_VERSION 0.8.0
ENV KSONNET_VERSION 0.10.1
RUN apt-get update
RUN apt-get -y install curl

View File

@ -1,5 +1,5 @@
#This container contains your model and any helper scripts specific to your model.
FROM tensorflow/tensorflow:1.5.1
FROM tensorflow/tensorflow:1.7.0
ADD model.py /opt/model.py
RUN chmod +x /opt/model.py

View File

@ -71,9 +71,9 @@ With our code ready, we will now build/push the docker image.
```
DOCKER_BASE_URL=docker.io/elsonrodriguez # Put your docker registry here
docker build . --no-cache -f Dockerfile.model -t ${DOCKER_BASE_URL}/mytfmodel:1.0
docker build . --no-cache -f Dockerfile.model -t ${DOCKER_BASE_URL}/mytfmodel:1.7
docker push ${DOCKER_BASE_URL}/mytfmodel:1.0
docker push ${DOCKER_BASE_URL}/mytfmodel:1.7
```
## Preparing your Kubernetes Cluster
@ -94,7 +94,7 @@ cd ${APP_NAME}
ks registry add kubeflow github.com/kubeflow/kubeflow/tree/master/kubeflow
ks pkg install kubeflow/core@1a6fc9d0e19e456b784ba1c23c03ec47648819d0
ks pkg install kubeflow/core@v0.1.2
ks pkg install kubeflow/argo@8d617d68b707d52a5906d38b235e04e540f2fcf7
# Deploy TF Operator and Argo
@ -133,15 +133,19 @@ $ argo list
NAME STATUS AGE DURATION
```
### Creating secrets for our workflow
For fetching and uploading data, our workflow requires S3 credentials. These credentials will be provided as kubernetes secrets:
### Creating secrets for our workflow and setting S3 variables.
For fetching and uploading data, our workflow requires S3 credentials and variables. These credentials will be provided as kubernetes secrets, and the variables will be passed into the workflow. Modify the below values to suit your environment.
```
export S3_ENDPOINT=s3.us-west-2.amazonaws.com
export AWS_ENDPOINT_URL=https://${S3_ENDPOINT}
export S3_ENDPOINT=s3.us-west-2.amazonaws.com #replace with your s3 endpoint in a host:port format, e.g. minio:9000
export AWS_ENDPOINT_URL=https://${S3_ENDPOINT} #use http instead of https for default minio installs
export AWS_ACCESS_KEY_ID=xxxxx
export AWS_SECRET_ACCESS_KEY=xxxxx
export AWS_REGION=us-west-2
export BUCKET_NAME=mybucket
export S3_USE_HTTPS=1 #set to 0 for default minio installs
export S3_VERIFY_SSL=1 #set to 0 for defaul minio installs
kubectl create secret generic aws-creds --from-literal=awsAccessKeyID=${AWS_ACCESS_KEY_ID} \
--from-literal=awsSecretAccessKey=${AWS_SECRET_ACCESS_KEY}
@ -174,9 +178,8 @@ First we need to set a few variables in our workflow. Make sure to set your dock
DOCKER_BASE_URL=docker.io/elsonrodriguez # Put your docker registry here
export S3_DATA_URL=s3://${BUCKET_NAME}/data/mnist/
export S3_TRAIN_BASE_URL=s3://${BUCKET_NAME}/models
export AWS_REGION=us-west-2
export JOB_NAME=myjob-$(uuidgen | cut -c -5 | tr '[:upper:]' '[:lower:]')
export TF_MODEL_IMAGE=${DOCKER_BASE_URL}/mytfmodel:1.0
export TF_MODEL_IMAGE=${DOCKER_BASE_URL}/mytfmodel:1.7
export TF_WORKER=3
export MODEL_TRAIN_STEPS=200
```
@ -194,6 +197,8 @@ argo submit model-train.yaml -n ${NAMESPACE} --serviceaccount tf-user \
-p job-name=${JOB_NAME} \
-p tf-worker=${TF_WORKER} \
-p model-train-steps=${MODEL_TRAIN_STEPS} \
-p s3-use-https=${S3_USE_HTTPS} \
-p s3-verify-ssl=${S3_VERIFY_SSL} \
-p namespace=${NAMESPACE}
```

View File

@ -1,6 +1,7 @@
#!/bin/bash
SERVICE_ACCOUNT=${SERVICE_ACCOUNT:-default}
#FIXME this is set to tf-user by default in order to work around lack of Serviceaccount setting in argo.
SERVICE_ACCOUNT=${SERVICE_ACCOUNT:-tf-user}
create-kubeconfig ${SERVICE_ACCOUNT} > kubeconfig.tmp
cp kubeconfig.tmp ~/.kube/config

View File

@ -115,7 +115,7 @@ spec:
- name: model-serving-ks-tag
- name: model-name
script:
image: elsonrodriguez/ksonnet:0.8.0-test6
image: elsonrodriguez/ksonnet:0.10.1
command: ["/ksonnet-entrypoint.sh"]
source: |
ks init my-model-server
@ -135,8 +135,8 @@ spec:
ks param set {{inputs.parameters.model-name}} s3_secret_secretaccesskey_key_name awsSecretAccessKey
ks param set {{inputs.parameters.model-name}} s3_aws_region {{inputs.parameters.aws-region}}
ks param set {{inputs.parameters.model-name}} s3_endpoint {{inputs.parameters.s3-endpoint}}
ks param set {{inputs.parameters.model-name}} s3_use_https \'{{inputs.parameters.s3-use-https}}\'
ks param set {{inputs.parameters.model-name}} s3_verify_ssl \'{{inputs.parameters.s3-verify-ssl}}\'
ks param set {{inputs.parameters.model-name}} s3_use_https {{inputs.parameters.s3-use-https}} --as-string
ks param set {{inputs.parameters.model-name}} s3_verify_ssl {{inputs.parameters.s3-verify-ssl}} --as-string
ks apply default -c {{inputs.parameters.model-name}}
#FIXME This doesn't actually work in the current version of argo. We're using a default of `tf-user` in the container entrypoint for now.
env:

View File

@ -15,13 +15,13 @@ spec:
- name: tf-ps # number of tf parameter servers
value: 2
- name: tf-model-image
value: elsonrodriguez/mytfmodel:1.0
- name: tf-serving-image #this image is a mirror of a private kubeflow-ci image
value: elsonrodriguez/model-server:1.0
value: elsonrodriguez/mytfmodel:1.7
- name: tf-serving-image #FIXME this image is a mirror of a private kubeflow-ci image, once we're building images swap this out. https://github.com/kubeflow/kubeflow/blob/dcf4adfe2dd1cec243647f3dd05d7c26246fddb1/components/k8s-model-server/images/Dockerfile.cpu
value: elsonrodriguez/model-server:1.6
- name: tf-tensorboard-image
value: tensorflow/tensorflow:1.5.1
value: tensorflow/tensorflow:1.7.0
- name: ks-image
value: elsonrodriguez/ksonnet:0.8.0-test7
value: elsonrodriguez/ksonnet:0.10.1
- name: model-name
value: mnist
- name: model-hidden-units
@ -354,8 +354,8 @@ spec:
ks param set {{workflow.parameters.model-name}} s3_secret_secretaccesskey_key_name awsSecretAccessKey
ks param set {{workflow.parameters.model-name}} s3_aws_region {{workflow.parameters.aws-region}}
ks param set {{workflow.parameters.model-name}} s3_endpoint {{workflow.parameters.s3-endpoint}}
ks param set {{workflow.parameters.model-name}} s3_use_https \'{{workflow.parameters.s3-use-https}}\'
ks param set {{workflow.parameters.model-name}} s3_verify_ssl \'{{workflow.parameters.s3-verify-ssl}}\'
ks param set {{workflow.parameters.model-name}} s3_use_https {{workflow.parameters.s3-use-https}} --as-string
ks param set {{workflow.parameters.model-name}} s3_verify_ssl {{workflow.parameters.s3-verify-ssl}} --as-string
ks apply default -c {{workflow.parameters.model-name}}
#FIXME This doesn't actually work in the current version of argo. We're using a default of `tf-user` in the container entrypoint for now.
env: