mirror of https://github.com/kubeflow/examples.git
195 lines
10 KiB
Plaintext
195 lines
10 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Neural Machine Translation with Attention\n",
|
|
"This machine translation sample trains an [encoder-decoder architecture with attention](https://github.com/tensorflow/nmt) on a [Spanish-English dataset](http://www.manythings.org/anki/)."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"\"\"\"\n",
|
|
"The nmt_with_attention.py file stores the core training code and \n",
|
|
"exposes a NeuralMachineTranslation class as required by Kubeflow Fairing\n",
|
|
"\"\"\"\n",
|
|
"import importlib\n",
|
|
"from kubeflow import fairing\n",
|
|
"from kubeflow.fairing import TrainJob\n",
|
|
"from nmt_with_attention import NeuralMachineTranslation"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Fairing internally builds an image of the exposed class which has a defined train function.\n",
|
|
"#You do not have to do this explicitly. We store this image on gcr.io.\n",
|
|
"#We supply a GKE Backend for the TrainJob to run on.\n",
|
|
"GCP_PROJECT = fairing.cloud.gcp.guess_project_name()\n",
|
|
"DOCKER_REGISTRY = 'gcr.io/{}/nmt-fairing-job'.format(GCP_PROJECT)\n",
|
|
"BuildContext = None\n",
|
|
"FAIRING_BACKEND = 'KubeflowGKEBackend'\n",
|
|
"BackendClass = getattr(importlib.import_module('kubeflow.fairing.backends'), FAIRING_BACKEND)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[W 200726 15:49:41 tasks:54] Using default base docker image: registry.hub.docker.com/library/python:3.6.9\n",
|
|
"[W 200726 15:49:41 tasks:62] Using builder: <class 'kubeflow.fairing.builders.cluster.cluster.ClusterBuilder'>\n",
|
|
"[I 200726 15:49:41 tasks:66] Building the docker image.\n",
|
|
"[I 200726 15:49:41 cluster:46] Building image using cluster builder.\n",
|
|
"[W 200726 15:49:41 base:94] /home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n",
|
|
"[I 200726 15:49:41 base:107] Creating docker context: /tmp/fairing_context_k0ruanza\n",
|
|
"[W 200726 15:49:41 base:94] /home/jovyan/.local/lib/python3.6/site-packages/kubeflow/fairing/__init__.py already exists in Fairing context, skipping...\n",
|
|
"[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n",
|
|
"[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n",
|
|
"[W 200726 15:49:43 manager:298] Waiting for fairing-builder-jlsdm-tmhx9 to start...\n",
|
|
"[I 200726 15:49:45 manager:304] Pod started running True\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"E0726 15:49:45.937807 1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.\n",
|
|
"\tFor verbose messaging see aws.Config.CredentialsChainVerboseErrors\n",
|
|
"E0726 15:49:46.948637 1 metadata.go:248] Failed to unmarshal scopes: json: cannot unmarshal string into Go value of type []string\n",
|
|
"\u001b[36mINFO\u001b[0m[0002] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n",
|
|
"E0726 15:49:47.402406 1 metadata.go:154] while reading 'google-dockercfg' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg\n",
|
|
"E0726 15:49:47.404360 1 metadata.go:166] while reading 'google-dockercfg-url' metadata: http status code: 404 while fetching url http://metadata.google.internal./computeMetadata/v1/instance/attributes/google-dockercfg-url\n",
|
|
"\u001b[36mINFO\u001b[0m[0005] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n",
|
|
"\u001b[36mINFO\u001b[0m[0007] Built cross stage deps: map[]\n",
|
|
"\u001b[36mINFO\u001b[0m[0007] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n",
|
|
"\u001b[36mINFO\u001b[0m[0008] Retrieving image manifest registry.hub.docker.com/library/python:3.6.9\n",
|
|
"\u001b[36mINFO\u001b[0m[0010] Executing 0 build triggers\n",
|
|
"\u001b[36mINFO\u001b[0m[0010] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:21f2f9550745de797add3a9e1b5b3ddf71ddd3db3d45ec6387b76a458b0f356f...\n",
|
|
"\u001b[36mINFO\u001b[0m[0011] Using caching version of cmd: COPY /app//requirements.txt /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0011] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:05fdc6b91ef364d422b2f4dab987aa7c762cb5788ff7e9e3fc334a31c3b40e10...\n",
|
|
"\u001b[36mINFO\u001b[0m[0012] Using caching version of cmd: RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
|
"\u001b[36mINFO\u001b[0m[0012] Checking for cached layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:fa480504c701aac01eecbb61d1bdc64162474937f788f9d59c1502f3e255be8f...\n",
|
|
"\u001b[36mINFO\u001b[0m[0012] No cached layer found for cmd COPY /app/ /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0012] Unpacking rootfs as cmd COPY /app/ /app/ requires it.\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] WORKDIR /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] cmd: workdir\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] Changed working directory to /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] Creating directory /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] Resolving 1 paths\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] Taking snapshot of files...\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] ENV FAIRING_RUNTIME 1\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] No files changed in this command, skipping snapshotting.\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] COPY /app//requirements.txt /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0032] Found cached layer, extracting to filesystem\n",
|
|
"\u001b[36mINFO\u001b[0m[0033] RUN if [ -e requirements.txt ];then pip install --no-cache -r requirements.txt; fi\n",
|
|
"\u001b[36mINFO\u001b[0m[0033] Found cached layer, extracting to filesystem\n",
|
|
"\u001b[36mINFO\u001b[0m[0068] COPY /app/ /app/\n",
|
|
"\u001b[36mINFO\u001b[0m[0068] Resolving 15 paths\n",
|
|
"\u001b[36mINFO\u001b[0m[0068] Taking snapshot of files...\n",
|
|
"\u001b[36mINFO\u001b[0m[0068] Pushing layer gcr.io/gsoc-kf-example/nmt-fairing-job/fairing-job/cache:fa480504c701aac01eecbb61d1bdc64162474937f788f9d59c1502f3e255be8f to cache now\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[W 200726 15:51:09 job:101] The job fairing-job-tk4tr launched.\n",
|
|
"[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n",
|
|
"[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n",
|
|
"[W 200726 15:51:09 manager:298] Waiting for fairing-job-tk4tr-t8hrx to start...\n",
|
|
"[I 200726 15:51:50 manager:304] Pod started running True\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip\n",
|
|
"2646016/2638744 [==============================] - 0s 0us/step\n",
|
|
"2020-07-26 15:51:53.576850: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory\n",
|
|
"2020-07-26 15:51:53.576953: E tensorflow/stream_executor/cuda/cuda_driver.cc:313] failed call to cuInit: UNKNOWN ERROR (303)\n",
|
|
"2020-07-26 15:51:53.577012: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fairing-job-tk4tr-t8hrx): /proc/driver/nvidia/version does not exist\n",
|
|
"2020-07-26 15:51:53.577458: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
|
|
"2020-07-26 15:51:53.611445: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2200000000 Hz\n",
|
|
"2020-07-26 15:51:53.612836: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f85b0000b20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
|
"2020-07-26 15:51:53.612878: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version\n",
|
|
"Path to stored file: /app/dataset/spa-eng/spa.txt\n",
|
|
"epoch 1:\n",
|
|
"loss=2.63\n",
|
|
"\n",
|
|
"epoch 2:\n",
|
|
"loss=1.91\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[W 200726 15:52:49 job:173] Cleaning up job fairing-job-tk4tr...\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'fairing-job-tk4tr'"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"\"\"\"\n",
|
|
"All files given in `input_files` are added in the built docker image\n",
|
|
"If input files contain a `requirements.txt` file, \n",
|
|
"all python packages given in the requirements file are installed in the image\n",
|
|
"\"\"\"\n",
|
|
"input_files = ['dataset/spa-eng/spa.txt',\n",
|
|
" 'nmt_with_attention.py',\n",
|
|
" 'requirements.txt']\n",
|
|
"train_job = TrainJob(NeuralMachineTranslation,\n",
|
|
" input_files=input_files, \n",
|
|
" docker_registry=DOCKER_REGISTRY, \n",
|
|
" backend=BackendClass(build_context_source=BuildContext))\n",
|
|
"train_job.submit()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.9"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|