diff --git a/github_issue_summarization/pipelines/README.md b/github_issue_summarization/pipelines/README.md index d1a0bda0..6e958f3b 100644 --- a/github_issue_summarization/pipelines/README.md +++ b/github_issue_summarization/pipelines/README.md @@ -4,6 +4,8 @@ This Kubeflow Pipelines example shows how to build a web app that summarizes GitHub issues using Kubeflow Pipelines to train and serve a model. The pipeline trains a [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor/) model on GitHub issue data, learning to predict issue titles from issue bodies. It then exports the trained model and deploys the exported model using [Tensorflow Serving](https://github.com/tensorflow/serving). The final step in the pipeline launches a web app, which interacts with the TF-Serving instance in order to get model predictions. +The example is designed to run on a Hosted KFP installation, installed via the [Cloud Console](https://console.cloud.google.com/ai-platform/pipelines/clusters) or via ['standalone' installation](https://www.kubeflow.org/docs/pipelines/installation/standalone-deployment/) instructions, but would also be straightforward to run on a Kubeflow installation with minor changes. + You can follow this example as a codelab: [g.co/codelabs/kfp-gis](https://g.co/codelabs/kfp-gis). diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile b/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile index 89df86c1..5045827b 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/containers/tf-serving-gh/Dockerfile @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:16.04 +FROM tensorflow/tensorflow:1.15.0-py3 RUN apt-get update -y @@ -20,7 +20,20 @@ RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev pyt RUN easy_install pip -RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.11.0 +RUN pip install pyyaml==3.12 six requests==2.18.4 + +# RUN apt-get update \ +# && apt-get install -y python3-pip python3-dev wget unzip \ +# && cd /usr/local/bin \ +# && ln -s /usr/bin/python3 python \ +# && pip3 install --upgrade pip + +# RUN apt-get install -y wget unzip git + +# RUN pip install --upgrade pip +# RUN pip install urllib3 certifi retrying +# RUN pip install google-cloud-storage + RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ unzip -qq google-cloud-sdk.zip -d tools && \ @@ -34,16 +47,9 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ touch /tools/google-cloud-sdk/lib/third_party/google.py -RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ - tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ - mkdir -p /tools/ks/bin && \ - cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ - rm ks_0.11.0_linux_amd64.tar.gz && \ - rm -r ks_0.11.0_linux_amd64 -ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin +ENV PATH $PATH:/tools/google-cloud-sdk/bin ADD build /ml ENTRYPOINT ["python", "/ml/deploy-tf-serve.py"] - diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py index f7e5f598..856c3860 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/deploy-tf-serve.py @@ -40,9 +40,10 @@ def main(): 'If not set, assuming this runs in a GKE container and current ' + 'cluster is used.') parser.add_argument('--zone', type=str, help='zone of the kubeflow cluster.') + parser.add_argument('--namespace', type=str, default='default') args = parser.parse_args() - KUBEFLOW_NAMESPACE = 'kubeflow' + # KUBEFLOW_NAMESPACE = 'kubeflow' # Make sure model dir exists before proceeding retries = 0 @@ -90,7 +91,7 @@ def main(): with open(target_file, "w") as target: data = f.read() changed = data.replace('MODEL_NAME', args.model_name) - changed1 = changed.replace('KUBEFLOW_NAMESPACE', KUBEFLOW_NAMESPACE) + changed1 = changed.replace('KUBEFLOW_NAMESPACE', args.namespace) changed2 = changed1.replace('MODEL_PATH', args.model_path) target.write(changed2) diff --git a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml index 240f1ac3..f4161339 100644 --- a/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml +++ b/github_issue_summarization/pipelines/components/kubeflow-resources/tf-serving-gh/tf-serve-template.yaml @@ -28,7 +28,7 @@ spec: app: MODEL_NAME type: ClusterIP --- -apiVersion: extensions/v1beta1 +apiVersion: apps/v1 kind: Deployment metadata: labels: @@ -37,16 +37,19 @@ metadata: namespace: KUBEFLOW_NAMESPACE spec: replicas: 1 + selector: + matchLabels: + app: MODEL_NAME template: metadata: labels: app: MODEL_NAME version: v1 spec: - volumes: - - name: gcp-credentials-user-gcp-sa - secret: - secretName: user-gcp-sa + # volumes: + # - name: gcp-credentials-user-gcp-sa + # secret: + # secretName: user-gcp-sa containers: - args: - --port=9000 @@ -56,15 +59,15 @@ spec: command: - /usr/bin/tensorflow_model_server image: tensorflow/serving - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /secret/gcp-credentials/user-gcp-sa.json - - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE - value: /secret/gcp-credentials/user-gcp-sa.json - volumeMounts: - - mountPath: /secret/gcp-credentials - name: gcp-credentials-user-gcp-sa - imagePullPolicy: IfNotPresent + # env: + # - name: GOOGLE_APPLICATION_CREDENTIALS + # value: /secret/gcp-credentials/user-gcp-sa.json + # - name: CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE + # value: /secret/gcp-credentials/user-gcp-sa.json + # volumeMounts: + # - mountPath: /secret/gcp-credentials + # name: gcp-credentials-user-gcp-sa + imagePullPolicy: Always livenessProbe: initialDelaySeconds: 30 periodSeconds: 30 diff --git a/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile b/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile index c86a15c3..6a6179bc 100644 --- a/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile +++ b/github_issue_summarization/pipelines/components/t2t/containers/t2t_app/Dockerfile @@ -21,9 +21,9 @@ RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev pyt RUN easy_install pip -RUN pip install tensorflow-probability==0.5 +RUN pip install tensorflow-probability==0.7 RUN pip install tensor2tensor==1.11.0 -RUN pip install tensorflow-serving-api +RUN pip install tensorflow-serving-api==1.14.0 RUN pip install gunicorn RUN pip install pyyaml==3.12 six==1.11.0 RUN pip install pandas diff --git a/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile b/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile index 05cc71c4..8c819b0c 100644 --- a/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile +++ b/github_issue_summarization/pipelines/components/t2t/containers/webapp-launcher/Dockerfile @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -FROM ubuntu:16.04 +FROM ubuntu:18.04 -RUN apt-get update -y +RUN apt-get update \ + && apt-get install -y python3-pip python3-dev wget unzip \ + && cd /usr/local/bin \ + && ln -s /usr/bin/python3 python \ + && pip3 install --upgrade pip -RUN apt-get install --no-install-recommends -y -q ca-certificates python-dev python-setuptools wget unzip +# RUN apt-get install -y wget unzip git -RUN easy_install pip +RUN pip install --upgrade pip +RUN pip install urllib3 certifi retrying +RUN pip install google-cloud-storage -RUN pip install pyyaml==3.12 six==1.11.0 requests==2.18.4 tensorflow==1.12.0 RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ unzip -qq google-cloud-sdk.zip -d tools && \ @@ -34,14 +39,8 @@ RUN wget -nv https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.zip && \ tools/google-cloud-sdk/bin/gcloud config set component_manager/disable_update_check true && \ touch /tools/google-cloud-sdk/lib/third_party/google.py -RUN wget -nv https://github.com/ksonnet/ksonnet/releases/download/v0.11.0/ks_0.11.0_linux_amd64.tar.gz && \ - tar -xvzf ks_0.11.0_linux_amd64.tar.gz && \ - mkdir -p /tools/ks/bin && \ - cp ./ks_0.11.0_linux_amd64/ks /tools/ks/bin && \ - rm ks_0.11.0_linux_amd64.tar.gz && \ - rm -r ks_0.11.0_linux_amd64 -ENV PATH $PATH:/tools/google-cloud-sdk/bin:/tools/ks/bin +ENV PATH $PATH:/tools/google-cloud-sdk/bin ADD build /ml diff --git a/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html b/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html index d3478b6a..6d16e814 100644 --- a/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html +++ b/github_issue_summarization/pipelines/components/t2t/t2t-app/app/templates/index.html @@ -12,8 +12,8 @@