{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Neural Machine Translation with Attention\n", "This machine translation sample trains an [encoder-decoder architecture with attention](https://github.com/tensorflow/nmt) on a [Spanish-English dataset](http://www.manythings.org/anki/)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "The nmt_with_attention.py file stores the core training code and \n", "exposes a NeuralMachineTranslation class as required by Kubeflow Fairing\n", "\"\"\"\n", "import importlib\n", "from kubeflow import fairing\n", "from kubeflow.fairing import TrainJob\n", "from nmt_with_attention import NeuralMachineTranslation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Fairing internally builds an image of the exposed class which has a defined train function.\n", "#You do not have to do this explicitly. We store this image on gcr.io.\n", "#We supply a GKE Backend for the TrainJob to run on.\n", "GCP_PROJECT = fairing.cloud.gcp.guess_project_name()\n", "DOCKER_REGISTRY = 'gcr.io/{}/nmt-fairing-job'.format(GCP_PROJECT)\n", "BuildContext = None\n", "FAIRING_BACKEND = 'KubeflowGKEBackend'\n", "BackendClass = getattr(importlib.import_module('kubeflow.fairing.backends'), FAIRING_BACKEND)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[W 200726 15:49:41 tasks:54] Using default base docker image: registry.hub.docker.com/library/python:3.6.9\n", "[W 200726 15:49:41 tasks:62] Using builder: \n", "[I 200726 15:49:41 tasks:66] Building the docker image.\n", "[I 200726 15:49:41 cluster:46] Building image using cluster builder.\n", "[W 200726 15:49:41 base:94] /home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n", "[I 200726 15:49:41 base:107] Creating docker context: /tmp/fairing_context_k0ruanza\n", "[W 200726 15:49:41 base:94] /home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n", "[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n", "[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n", "[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n", "[I 200726 15:49:45 manager:304] Pod started running True\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "E0726 15:49:45.937807 1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.\n", "\tFor verbose messaging see aws.Config.CredentialsChainVerboseErrors\n", "E0726 15:49:46.948637 1 metadata.go:248] Failed to unmarshal scopes: json: cannot unmarshal string into Go value of type []string\n", "\u001b[36mINFO\u001b[0m[0002] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n", "E0726 15:49:47.402406 1 metadata.go:154] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg\n", "E0726 15:49:47.404360 1 metadata.go:166] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url\n", "\u001b[36mINFO\u001b[0m[0005] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n", "\u001b[36mINFO\u001b[0m[0007] Built cross stage deps: map[]\n", "\u001b[36mINFO\u001b[0m[0007] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n", "\u001b[36mINFO\u001b[0m[0008] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n", "\u001b[36mINFO\u001b[0m[0010] Executing 0 build triggers\n", "\u001b[36mINFO\u001b[0m[0010] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:21f2f9550745de797add3a9e1b5b3ddf71ddd3db3d45ec6387b76a458b0f356f...\n", "\u001b[36mINFO\u001b[0m[0011] Using caching version of cmd: COPY /app//requirements.txt /app/\n", "\u001b[36mINFO\u001b[0m[0011] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:05fdc6b91ef364d422b2f4dab987aa7c762cb5788ff7e9e3fc334a31c3b40e10...\n", "\u001b[36mINFO\u001b[0m[0012] Using caching version of cmd: RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", "\u001b[36mINFO\u001b[0m[0012] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:fa480504c701aac01eecbb61d1bdc64162474937f788f9d59c1502f3e255be8f...\n", "\u001b[36mINFO\u001b[0m[0012] No cached layer found for cmd COPY /app/ /app/\n", "\u001b[36mINFO\u001b[0m[0012] Unpacking rootfs as cmd COPY /app/ /app/ requires it.\n", "\u001b[36mINFO\u001b[0m[0032] WORKDIR /app/\n", "\u001b[36mINFO\u001b[0m[0032] cmd: workdir\n", "\u001b[36mINFO\u001b[0m[0032] Changed working directory to /app/\n", "\u001b[36mINFO\u001b[0m[0032] Creating directory /app/\n", "\u001b[36mINFO\u001b[0m[0032] Resolving 1 paths\n", "\u001b[36mINFO\u001b[0m[0032] Taking snapshot of files...\n", "\u001b[36mINFO\u001b[0m[0032] ENV FAIRING_RUNTIME 1\n", "\u001b[36mINFO\u001b[0m[0032] No files changed in this command, skipping snapshotting.\n", "\u001b[36mINFO\u001b[0m[0032] COPY /app//requirements.txt /app/\n", "\u001b[36mINFO\u001b[0m[0032] Found cached layer, extracting to filesystem\n", "\u001b[36mINFO\u001b[0m[0033] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n", "\u001b[36mINFO\u001b[0m[0033] Found cached layer, extracting to filesystem\n", "\u001b[36mINFO\u001b[0m[0068] COPY /app/ /app/\n", "\u001b[36mINFO\u001b[0m[0068] Resolving 15 paths\n", "\u001b[36mINFO\u001b[0m[0068] Taking snapshot of files...\n", "\u001b[36mINFO\u001b[0m[0068] Pushing layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:fa480504c701aac01eecbb61d1bdc64162474937f788f9d59c1502f3e255be8f to cache now\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[W 200726 15:51:09 job:101] The job fairing-job-tk4tr launched.\n", "[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n", "[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n", "[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n", "[I 200726 15:51:50 manager:304] Pod started running True\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\n", "2646016/2638744 [==============================] - 0s 0us/step\n", "2020-07-26 15:51:53.576850: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n", "2020-07-26 15:51:53.576953: E tensorflow/stream_executor/cuda/cuda_driver.cc:313] failed call to cuInit: UNKNOWN ERROR (303)\n", "2020-07-26 15:51:53.577012: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fairing-job-tk4tr-t8hrx): /proc/driver/nvidia/version does not exist\n", "2020-07-26 15:51:53.577458: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n", "2020-07-26 15:51:53.611445: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2200000000 Hz\n", "2020-07-26 15:51:53.612836: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f85b0000b20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n", "2020-07-26 15:51:53.612878: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version\n", "Path to stored file: /app/dataset/spa-eng/spa.txt\n", "epoch 1:\n", "loss=2.63\n", "\n", "epoch 2:\n", "loss=1.91\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[W 200726 15:52:49 job:173] Cleaning up job fairing-job-tk4tr...\n" ] }, { "data": { "text/plain": [ "'fairing-job-tk4tr'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "All files given in `input_files` are added in the built docker image\n", "If input files contain a `requirements.txt` file, \n", "all python packages given in the requirements file are installed in the image\n", "\"\"\"\n", "input_files = ['dataset/spa-eng/spa.txt',\n", " 'nmt_with_attention.py',\n", " 'requirements.txt']\n", "train_job = TrainJob(NeuralMachineTranslation,\n", " input_files=input_files, \n", " docker_registry=DOCKER_REGISTRY, \n", " backend=BackendClass(build_context_source=BuildContext))\n", "train_job.submit()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }