diff --git a/github_issue_summarization/pipelines/README.md b/github_issue_summarization/pipelines/README.md index d1a0bda0..6e958f3b 100644 --- a/github_issue_summarization/pipelines/README.md +++ b/github_issue_summarization/pipelines/README.md @@ -4,6 +4,8 @@ This Kubeflow Pipelines example shows how to build a web app that summarizes GitHub issues using Kubeflow Pipelines to train and serve a model. The pipeline trains a [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor/) model on GitHub issue data, learning to predict issue titles from issue bodies. It then exports the trained model and deploys the exported model using [Tensorflow Serving](https://github.com/tensorflow/serving). The final step in the pipeline launches a web app, which interacts with the TF-Serving instance in order to get model predictions. +The example is designed to run on a Hosted KFP installation, installed via the [Cloud Console](https://console.cloud.google.com/ai-platform/pipelines/clusters) or via ['standalone' installation](https://www.kubeflow.org/docs/pipelines/installation/standalone-deployment/) instructions, but would also be straightforward to run on a Kubeflow installation with minor changes. + You can follow this example as a codelab: [g.co/codelabs/kfp-gis](https://g.co/codelabs/kfp-gis). diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile b/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile index 89df86c1..5045827b 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:16.04 +FROM tensorflow/tensorflow:1.15.0-py3 RUN apt-get update -y @@ -20,7 +20,20 @@ RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev pyt RUN easy_install pip -RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0 +RUN pip install pyyaml==3.12 six requests==2.18.4 + +# RUN apt-get update \ +# && apt-get install -y python3-pip python3-dev wget unzip \ +# && cd /usr/local/bin \ +# && ln -s /usr/bin/python3 python \ +# && pip3 install --upgrade pip + +# RUN apt-get install -y wget unzip git + +# RUN pip install --upgrade pip +# RUN pip install urllib3 certifi retrying +# RUN pip install google-cloud-storage + RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ unzip -qq google-cloud-sdk.zip -d tools && \ @@ -34,16 +47,9 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ touch /tools/google-cloud-sdk/lib/third_party/google.py -RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ - tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ - mkdir -p /tools/ks/bin && \ - cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ - rm ks_0.11.0_linux_amd64.tar.gz && \ - rm -r ks_0.11.0_linux_amd64 -ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin +ENV PATH $PATH:/tools/google-cloud-sdk/bin ADD build /ml ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"] - diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py index f7e5f598..856c3860 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py @@ -40,9 +40,10 @@ def main(): 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') + parser.add_argument('--namespace', type=str, default='default') args = parser.parse_args() - KUBEFLOW_NAMESPACE = 'kubeflow' + # KUBEFLOW_NAMESPACE = 'kubeflow' # Make sure model dir exists before proceeding retries = 0 @@ -90,7 +91,7 @@ def main(): with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) - changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) + changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace) changed2 = changed1.replace('MODEL_PATH', args.model_path) target.write(changed2) diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml index 240f1ac3..f4161339 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml @@ -28,7 +28,7 @@ spec: app: MODEL_NAME type: ClusterIP --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: Deployment metadata: labels: @@ -37,16 +37,19 @@ metadata: namespace: KUBEFLOW_NAMESPACE spec: replicas: 1 + selector: + matchLabels: + app: MODEL_NAME template: metadata: labels: app: MODEL_NAME version: v1 spec: - volumes: - - name: gcp-credentials-user-gcp-sa - secret: - secretName: user-gcp-sa + # volumes: + # - name: gcp-credentials-user-gcp-sa + # secret: + # secretName: user-gcp-sa containers: - args: - --port=9000 @@ -56,15 +59,15 @@ spec: command: - /usr/bin/tensorflow_model_server image: tensorflow/serving - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /secret/gcp-credentials/user-gcp-sa.json - - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE - value: /secret/gcp-credentials/user-gcp-sa.json - volumeMounts: - - mountPath: /secret/gcp-credentials - name: gcp-credentials-user-gcp-sa - imagePullPolicy: IfNotPresent + # env: + # - name: GOOGLE_APPLICATION_CREDENTIALS + # value: /secret/gcp-credentials/user-gcp-sa.json + # - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE + # value: /secret/gcp-credentials/user-gcp-sa.json + # volumeMounts: + # - mountPath: /secret/gcp-credentials + # name: gcp-credentials-user-gcp-sa + imagePullPolicy: Always livenessProbe: initialDelaySeconds: 30 periodSeconds: 30 diff --git a/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile b/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile index c86a15c3..6a6179bc 100644 --- a/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile +++ b/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile @@ -21,9 +21,9 @@ RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev pyt RUN easy_install pip -RUN pip install tensorflow-probability==0.5 +RUN pip install tensorflow-probability==0.7 RUN pip install tensor2tensor==1.11.0 -RUN pip install tensorflow-serving-api +RUN pip install tensorflow-serving-api==1.14.0 RUN pip install gunicorn RUN pip install pyyaml==3.12 six==1.11.0 RUN pip install pandas diff --git a/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile b/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile index 05cc71c4..8c819b0c 100644 --- a/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile +++ b/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:16.04 +FROM ubuntu:18.04 -RUN apt-get update -y +RUN apt-get update \ + && apt-get install -y python3-pip python3-dev wget unzip \ + && cd /usr/local/bin \ + && ln -s /usr/bin/python3 python \ + && pip3 install --upgrade pip -RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip +# RUN apt-get install -y wget unzip git -RUN easy_install pip +RUN pip install --upgrade pip +RUN pip install urllib3 certifi retrying +RUN pip install google-cloud-storage -RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.12.0 RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ unzip -qq google-cloud-sdk.zip -d tools && \ @@ -34,14 +39,8 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ touch /tools/google-cloud-sdk/lib/third_party/google.py -RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ - tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ - mkdir -p /tools/ks/bin && \ - cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ - rm ks_0.11.0_linux_amd64.tar.gz && \ - rm -r ks_0.11.0_linux_amd64 -ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin +ENV PATH $PATH:/tools/google-cloud-sdk/bin ADD build /ml diff --git a/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html b/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html index d3478b6a..6d16e814 100644 --- a/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html +++ b/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html @@ -12,8 +12,8 @@
- -

Github Issue Summarization

+ +

Github Issue Summarization

This app takes as input a Github issue body and predicts a title for it. Behind the scenes it uses a Tensor2Tensor TensorFlow model, served via TF-Serving .

(Thanks to Hamel Husain for the original concept and source data.)

diff --git a/github_issue_summarization/pipelines/components/t2t/webapp-launcher/deploy-webapp.py b/github_issue_summarization/pipelines/components/t2t/webapp-launcher/deploy-webapp.py index d32549f3..f2105e87 100644 --- a/github_issue_summarization/pipelines/components/t2t/webapp-launcher/deploy-webapp.py +++ b/github_issue_summarization/pipelines/components/t2t/webapp-launcher/deploy-webapp.py @@ -36,11 +36,12 @@ def main(): 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') + parser.add_argument('--namespace', type=str, default='default') args = parser.parse_args() - KUBEFLOW_NAMESPACE = 'kubeflow' + # KUBEFLOW_NAMESPACE = 'kubeflow' - print("using model name: %s and namespace: %s" % (args.model_name, KUBEFLOW_NAMESPACE)) + print("using model name: %s and namespace: %s" % (args.model_name, args.namespace)) logging.getLogger().setLevel(logging.INFO) args_dict = vars(args) @@ -70,7 +71,7 @@ def main(): with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) - changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE).replace( + changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace).replace( 'GITHUB_TOKEN', args.github_token).replace( 'DATA_DIR', 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/') target.write(changed1) diff --git a/github_issue_summarization/pipelines/components/t2t/webapp-launcher/t2tapp-template.yaml b/github_issue_summarization/pipelines/components/t2t/webapp-launcher/t2tapp-template.yaml index 3f6c48aa..155d6286 100644 --- a/github_issue_summarization/pipelines/components/t2t/webapp-launcher/t2tapp-template.yaml +++ b/github_issue_summarization/pipelines/components/t2t/webapp-launcher/t2tapp-template.yaml @@ -1,27 +1,3 @@ -apiVersion: networking.istio.io/v1alpha3 -kind: VirtualService -metadata: - name: webapp-MODEL_NAME -spec: - gateways: - - kubeflow-gateway - hosts: - - '*' - http: - - match: - - uri: - prefix: /webapp/ - rewrite: - uri: / - route: - - destination: - host: MODEL_NAME-webappsvc.KUBEFLOW_NAMESPACE.svc.cluster.local - port: - number: 80 - timeout: 300s - ---- - apiVersion: v1 kind: Service metadata: @@ -50,33 +26,36 @@ spec: --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: Deployment metadata: name: MODEL_NAME-webapp spec: replicas: 1 + selector: + matchLabels: + app: ghsumm template: metadata: labels: app: ghsumm role: frontend spec: - volumes: - - name: gcp-credentials-user-gcp-sa - secret: - secretName: user-gcp-sa + # volumes: + # - name: gcp-credentials-user-gcp-sa + # secret: + # secretName: user-gcp-sa containers: - name: MODEL_NAME-webapp - image: gcr.io/google-samples/ml-pipeline-t2tapp:v3ap - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /secret/gcp-credentials/user-gcp-sa.json - - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE - value: /secret/gcp-credentials/user-gcp-sa.json - volumeMounts: - - mountPath: /secret/gcp-credentials - name: gcp-credentials-user-gcp-sa + image: gcr.io/google-samples/ml-pipeline-t2tapp:vap9 + # env: + # - name: GOOGLE_APPLICATION_CREDENTIALS + # value: /secret/gcp-credentials/user-gcp-sa.json + # - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE + # value: /secret/gcp-credentials/user-gcp-sa.json + # volumeMounts: + # - mountPath: /secret/gcp-credentials + # name: gcp-credentials-user-gcp-sa # resources: # limits: # nvidia.com/gpu: 1 diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz b/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz deleted file mode 100644 index 1ff03e97..00000000 Binary files a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py.tar.gz and /dev/null differ diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py new file mode 100644 index 00000000..2fa2738c --- /dev/null +++ b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py @@ -0,0 +1,88 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import kfp.dsl as dsl +import kfp.gcp as gcp +import kfp.components as comp + + +COPY_ACTION = 'copy_data' +TRAIN_ACTION = 'train' +DATASET = 'dataset' +MODEL = 'model' + +copydata_op = comp.load_component_from_url( + 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/datacopy_component.yaml' # pylint: disable=line-too-long + ) + +train_op = comp.load_component_from_url( + 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long + ) + + +@dsl.pipeline( + name='Github issue summarization', + description='Demonstrate Tensor2Tensor-based training and TF-Serving' +) +def gh_summ( #pylint: disable=unused-argument + train_steps: 'Integer' = 2019300, + project: str = 'YOUR_PROJECT_HERE', + github_token: str = 'YOUR_GITHUB_TOKEN_HERE', + working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE', + checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', + deploy_webapp: str = 'true', + data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' + ): + + + copydata = copydata_op( + data_dir=data_dir, + checkpoint_dir=checkpoint_dir, + model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER), + action=COPY_ACTION, + ) + + + train = train_op( + data_dir=data_dir, + model_dir=copydata.outputs['copy_output_path'], + action=TRAIN_ACTION, train_steps=train_steps, + deploy_webapp=deploy_webapp + ) + + serve = dsl.ContainerOp( + name='serve', + image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v6', + arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,), + "--model_path", train.outputs['train_output_path'], "--namespace", 'default' + ] + ) + + train.set_gpu_limit(1) + + with dsl.Condition(train.outputs['launch_server'] == 'true'): + webapp = dsl.ContainerOp( + name='webapp', + image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v1', + arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,), + "--github_token", github_token, "--namespace", 'default'] + + ) + webapp.after(serve) + + +if __name__ == '__main__': + import kfp.compiler as compiler + compiler.Compiler().compile(gh_summ, __file__ + '.tar.gz') diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py.tar.gz b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py.tar.gz new file mode 100644 index 00000000..9b54c8dc Binary files /dev/null and b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp.py.tar.gz differ diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ_preempt.py b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp_preempt.py similarity index 52% rename from github_issue_summarization/pipelines/example_pipelines/gh_summ_preempt.py rename to github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp_preempt.py index 2643844c..2c68d462 100644 --- a/github_issue_summarization/pipelines/example_pipelines/gh_summ_preempt.py +++ b/github_issue_summarization/pipelines/example_pipelines/gh_summ_hosted_kfp_preempt.py @@ -16,6 +16,7 @@ import kfp.dsl as dsl import kfp.gcp as gcp import kfp.components as comp +# from kfp.dsl.types import GCSPath, String COPY_ACTION = 'copy_data' @@ -32,77 +33,54 @@ train_op = comp.load_component_from_url( 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long ) -metadata_log_op = comp.load_component_from_url( - 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/metadata_log_component.yaml' # pylint: disable=line-too-long - ) @dsl.pipeline( name='Github issue summarization', description='Demonstrate Tensor2Tensor-based training and TF-Serving' ) def gh_summ( #pylint: disable=unused-argument - train_steps=2019300, - project='YOUR_PROJECT_HERE', - github_token='YOUR_GITHUB_TOKEN_HERE', - working_dir='YOUR_GCS_DIR_HERE', - checkpoint_dir='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000', - deploy_webapp='true', - data_dir='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' + train_steps: 'Integer' = 2019300, + project: str = 'YOUR_PROJECT_HERE', + github_token: str = 'YOUR_GITHUB_TOKEN_HERE', + working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE', + checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/', + deploy_webapp: str = 'true', + data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/' ): copydata = copydata_op( - working_dir=working_dir, data_dir=data_dir, checkpoint_dir=checkpoint_dir, - model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'), - action=COPY_ACTION - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - - log_dataset = metadata_log_op( - log_type=DATASET, - workspace_name=WORKSPACE_NAME, - run_name='{{workflow.name}}', - data_uri=data_dir + model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER), + action=COPY_ACTION, ) + train = train_op( - working_dir=working_dir, data_dir=data_dir, - checkpoint_dir=checkpoint_dir, - model_dir='%s/%s/model_output' % (working_dir, '{{workflow.name}}'), + model_dir=copydata.outputs['copy_output_path'], action=TRAIN_ACTION, train_steps=train_steps, deploy_webapp=deploy_webapp - ).apply(gcp.use_gcp_secret('user-gcp-sa')) - - - log_model = metadata_log_op( - log_type=MODEL, - workspace_name=WORKSPACE_NAME, - run_name='{{workflow.name}}', - model_uri='%s/%s/model_output' % (working_dir, '{{workflow.name}}') ) serve = dsl.ContainerOp( name='serve', - image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve', - arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), - "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}') + image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v5', + arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,), + "--model_path", train.outputs['train_output_path'] ] ) - log_dataset.after(copydata) - train.after(copydata) - log_model.after(train) - serve.after(train) - train.set_gpu_limit(4).apply(gcp.use_preemptible_nodepool()).set_retry(5) - train.set_memory_limit('48G') - with dsl.Condition(train.output == 'true'): + # train.set_gpu_limit(1) + train.set_gpu_limit(1).apply(gcp.use_preemptible_nodepool()).set_retry(10) + + + with dsl.Condition(train.outputs['launch_server'] == 'true'): webapp = dsl.ContainerOp( name='webapp', - image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap', - arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',), + image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v7ap', + arguments=["--model_name", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,), "--github_token", github_token] ) diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py.tar.gz b/github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py.tar.gz deleted file mode 100644 index 49c89d32..00000000 Binary files a/github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py.tar.gz and /dev/null differ diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ.py b/github_issue_summarization/pipelines/example_pipelines/obsolete/gh_summ.py similarity index 100% rename from github_issue_summarization/pipelines/example_pipelines/gh_summ.py rename to github_issue_summarization/pipelines/example_pipelines/obsolete/gh_summ.py diff --git a/github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py b/github_issue_summarization/pipelines/example_pipelines/obsolete/gh_summ_serve.py similarity index 100% rename from github_issue_summarization/pipelines/example_pipelines/gh_summ_serve.py rename to github_issue_summarization/pipelines/example_pipelines/obsolete/gh_summ_serve.py diff --git a/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb b/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb index 60f42d7d..a194ca91 100644 --- a/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb +++ b/github_issue_summarization/pipelines/example_pipelines/pipelines-notebook.ipynb @@ -6,7 +6,7 @@ "source": [ "# KubeFlow Pipeline: Github Issue Summarization using Tensor2Tensor\n", "\n", - "This notebook assumes that you have already set up a GKE cluster with Kubeflow installed as per this codelab: [g.co/codelabs/kubecon18](g.co/codelabs/kubecon18). Currently, this notebook must be run from the Kubeflow JupyterHub installation, as described in the codelab.\n", + "This notebook assumes that you have already set up a GKE cluster with CAIP Pipelines (Hosted KFP) installed, with the addition of a GPU-enabled node pool, as per this codelab: [g.co/codelabs/kubecon18](g.co/codelabs/kubecon18).\n", "\n", "In this notebook, we will show how to:\n", "\n", @@ -29,7 +29,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Do some installations and imports, and set some variables. Set the `WORKING_DIR` to a path under the Cloud Storage bucket you created earlier. The Pipelines SDK is bundled with the notebook server image, but we'll make sure that we're using the most current version for this example. You may need to restart your kernel after the SDK update." + "Do some installations and imports, and set some variables. Set the `WORKING_DIR` to a path under the Cloud Storage bucket you created earlier. You may need to restart your kernel after the KFP SDK update." ] }, { @@ -41,6 +41,18 @@ "!pip install -U kfp" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Restart kernel after the pip install\n", + "import IPython\n", + "\n", + "IPython.Application.instance().kernel.do_shutdown(True)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -65,19 +77,37 @@ "source": [ "# Define some pipeline input variables. \n", "WORKING_DIR = 'gs://YOUR_GCS_BUCKET/t2t/notebooks' # Such as gs://bucket/object/path\n", - "PROJECT_NAME = 'YOUR_PROJECT'\n", - "GITHUB_TOKEN = 'YOUR_GITHUB_TOKEN' # needed for prediction, to grab issue data from GH\n", "\n", - "DEPLOY_WEBAPP = 'false'" + "PROJECT_NAME = 'YOUR_PROJECT'\n", + "GITHUB_TOKEN = 'YOUR_GITHUB_TOKEN' # optional; used for prediction, to grab issue data from GH\n", + "\n", + "DEPLOY_WEBAPP = 'false' # change this to 'true' to deploy a new version of the webapp part of the pipeline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Create an *Experiment* in the Kubeflow Pipeline System\n", + "## Instantiate the KFP client and create an *Experiment* in the Kubeflow Pipeline System\n", "\n", - "The Kubeflow Pipeline system requires an \"Experiment\" to group pipeline runs. You can create a new experiment, or call `client.list_experiments()` to get existing ones." + "Next we'll instantiate a KFP client object with the `host` info from your Hosted KFP installation. To do this, go to the Pipelines dashboard in the Cloud Console and click on the \"Settings\" gear for the KFP installation that you want to use. You'll see a popup window. Look for the \"Connect to this Kubeflow Pipelines instance...\" text and copy the \"client = kfp.Client(...)\" line below it. Edit the following cell to use that line." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# CHANGE THIS with the info for your KFP cluster installation\n", + "client = kfp.Client(host='xxxxxxxx-dot-us-centralx.pipelines.googleusercontent.com')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Kubeflow Pipeline system requires an \"Experiment\" to group pipeline runs. You can create a new experiment, or call `client.list_experiments()` to get existing ones. (This will also serve to check that your client is set up properly)." ] }, { @@ -86,9 +116,6 @@ "metadata": {}, "outputs": [], "source": [ - "# Note that this notebook should be running in JupyterHub in the same cluster as the pipeline system.\n", - "# Otherwise, additional config would be required to connect.\n", - "client = kfp.Client()\n", "client.list_experiments()" ] }, @@ -136,7 +163,6 @@ "source": [ "COPY_ACTION = 'copy_data'\n", "TRAIN_ACTION = 'train'\n", - "WORKSPACE_NAME = 'ws_gh_summ'\n", "DATASET = 'dataset'\n", "MODEL = 'model'\n", "\n", @@ -146,11 +172,7 @@ "\n", "train_op = comp.load_component_from_url(\n", " 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/train_component.yaml' # pylint: disable=line-too-long\n", - " )\n", - "\n", - "metadata_log_op = comp.load_component_from_url(\n", - " 'https://raw.githubusercontent.com/kubeflow/examples/master/github_issue_summarization/pipelines/components/t2t/metadata_log_component.yaml' # pylint: disable=line-too-long\n", - " )" + " )\n" ] }, { @@ -166,35 +188,25 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "@dsl.pipeline(\n", " name='Github issue summarization',\n", " description='Demonstrate Tensor2Tensor-based training and TF-Serving'\n", ")\n", - "def gh_summ( #pylint: disable=unused-argument\n", + "def gh_summ(\n", " train_steps: 'Integer' = 2019300,\n", " project: str = 'YOUR_PROJECT_HERE',\n", " github_token: str = 'YOUR_GITHUB_TOKEN_HERE',\n", - " working_dir: 'GCSPath' = 'YOUR_GCS_DIR_HERE',\n", + " working_dir: 'GCSPath' = 'gs://YOUR_GCS_DIR_HERE',\n", " checkpoint_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',\n", " deploy_webapp: str = 'true',\n", " data_dir: 'GCSPath' = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'\n", " ):\n", "\n", - "\n", " copydata = copydata_op(\n", " data_dir=data_dir,\n", " checkpoint_dir=checkpoint_dir,\n", " model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),\n", " action=COPY_ACTION,\n", - " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "\n", - "\n", - " log_dataset = metadata_log_op(\n", - " log_type=DATASET,\n", - " workspace_name=WORKSPACE_NAME,\n", - " run_name=dsl.RUN_ID_PLACEHOLDER,\n", - " data_uri=data_dir\n", " )\n", "\n", " train = train_op(\n", @@ -202,32 +214,22 @@ " model_dir=copydata.outputs['copy_output_path'],\n", " action=TRAIN_ACTION, train_steps=train_steps,\n", " deploy_webapp=deploy_webapp\n", - " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "\n", - "\n", - " log_model = metadata_log_op(\n", - " log_type=MODEL,\n", - " workspace_name=WORKSPACE_NAME,\n", - " run_name=dsl.RUN_ID_PLACEHOLDER,\n", - " model_uri=train.outputs['train_output_path']\n", " )\n", "\n", " serve = dsl.ContainerOp(\n", " name='serve',\n", - " image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',\n", + " image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v5',\n", " arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n", " \"--model_path\", train.outputs['train_output_path']\n", " ]\n", " )\n", - " log_dataset.after(copydata)\n", - " log_model.after(train)\n", + "\n", " train.set_gpu_limit(1)\n", - " train.set_memory_limit('48G')\n", "\n", " with dsl.Condition(train.outputs['launch_server'] == 'true'):\n", " webapp = dsl.ContainerOp(\n", " name='webapp',\n", - " image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap',\n", + " image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v7ap',\n", " arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n", " \"--github_token\", github_token]\n", "\n", @@ -302,7 +304,7 @@ " name='datagen',\n", " image='gcr.io/google-samples/ml-pipeline-t2tproc',\n", " arguments=[ \"--data-dir\", data_dir, \"--project\", project]\n", - " ).apply(gcp.use_gcp_secret('user-gcp-sa'))" + " )" ] }, { @@ -346,14 +348,6 @@ " checkpoint_dir=checkpoint_dir,\n", " model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),\n", " action=COPY_ACTION,\n", - " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "\n", - "\n", - " log_dataset = metadata_log_op(\n", - " log_type=DATASET,\n", - " workspace_name=WORKSPACE_NAME,\n", - " run_name=dsl.RUN_ID_PLACEHOLDER,\n", - " data_uri=data_dir\n", " )\n", "\n", " train = train_op(\n", @@ -361,38 +355,28 @@ " model_dir=copydata.outputs['copy_output_path'],\n", " action=TRAIN_ACTION, train_steps=train_steps,\n", " deploy_webapp=deploy_webapp\n", - " ).apply(gcp.use_gcp_secret('user-gcp-sa'))\n", - "\n", - " log_dataset.after(copydata)\n", - " train.after(preproc)\n", - "\n", - " log_model = metadata_log_op(\n", - " log_type=MODEL,\n", - " workspace_name=WORKSPACE_NAME,\n", - " run_name=dsl.RUN_ID_PLACEHOLDER,\n", - " model_uri=train.outputs['train_output_path']\n", " )\n", + " train.after(preproc) \n", "\n", " serve = dsl.ContainerOp(\n", " name='serve',\n", - " image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',\n", + " image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v5',\n", " arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n", " \"--model_path\", train.outputs['train_output_path']\n", " ]\n", " )\n", - " log_model.after(train)\n", + "\n", " train.set_gpu_limit(1)\n", - " train.set_memory_limit('48G')\n", "\n", " with dsl.Condition(train.outputs['launch_server'] == 'true'):\n", " webapp = dsl.ContainerOp(\n", " name='webapp',\n", - " image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v2ap',\n", + " image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v7ap',\n", " arguments=[\"--model_name\", 'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER,),\n", " \"--github_token\", github_token]\n", "\n", " )\n", - " webapp.after(serve)\n" + " webapp.after(serve)" ] }, { @@ -429,13 +413,20 @@ "metadata": {}, "source": [ "You should be able to see your newly defined pipeline run in the dashboard:\n", - "![](https://storage.googleapis.com/amy-jo/images/kf-pls/Screenshot%202019-09-04%2013.24.19%202.png)\n", - "\n", + "![](https://storage.googleapis.com/amy-jo/images/kf-pls/t2t_pipeline_in_dashboard.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "The new pipeline has the following structure:\n", - "![The new pipeline structure.](https://storage.googleapis.com/amy-jo/images/kf-pls/pipeline_new.png)\n", + "\n", + "![The new pipeline structure.](https://storage.googleapis.com/amy-jo/images/kf-pls/t2t_pipeline_structure.png)\n", "\n", "Below is a screenshot of the pipeline running.\n", - "![The pipeline running.](https://storage.googleapis.com/amy-jo/images/kf-pls/Screenshot_2019-09-06_13_06_50.png)" + "\n", + "![The pipeline running.](https://storage.googleapis.com/amy-jo/images/kf-pls/t2t_pipeline_running.png)" ] }, { @@ -467,6 +458,11 @@ } ], "metadata": { + "environment": { + "name": "tf2-2-2-gpu.2-2.m48", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-2-2-gpu.2-2:m48" + }, "kernelspec": { "display_name": "Python 3", "language": "python", @@ -482,9 +478,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.7.6" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }