examples/code_search/docker/t2t/Dockerfile.dataflow

33 lines
1.2 KiB
Docker

# Dockerfile suitable for submitting Dataflow jobs and for runnin nmslib index creator.
#
# We don't use the Docker image used for running the training jobs
# because we have different versioning requirements.
FROM python:2.7-jessie
# These dependencies are primarily needed with Dataflow
# so we need to install them for Python2.
# We do this before copying the code because we don't want to have
# reinstall the requirements just because the code changed.
COPY src/requirements.dataflow.txt /tmp/requirements.dataflow.txt
RUN pip install -r /tmp/requirements.dataflow.txt
RUN pip install https://github.com/kubeflow/batch-predict/tarball/master
# Install nmslib requirements so that we can create the index
COPY src/requirements.nmslib.txt /tmp/requirements.nmslib.txt
RUN pip install -r /tmp/requirements.nmslib.txt
# install the spacy model
RUN python -m spacy download en
ADD src/code_search /app/code_search
ADD src /src
# See: https://github.com/kubeflow/examples/issues/390
# Dataflow will try to build a source package locally and we need
# the path to match what we have in setup.py.
RUN ln -sf /src/requirements.dataflow.txt /src/requirements.txt
WORKDIR /src
ENV PYTHONIOENCODING=utf-8 T2T_USR_DIR=/app/code_search/t2t