mirror of https://github.com/kubeflow/examples.git
56 lines
2.2 KiB
Docker
Executable File
56 lines
2.2 KiB
Docker
Executable File
FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
|
|
ARG PYTHON_VERSION=3.6
|
|
|
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
build-essential \
|
|
cmake \
|
|
git \
|
|
curl \
|
|
vim \
|
|
wget \
|
|
ca-certificates \
|
|
openssh-client \
|
|
libjpeg-dev \
|
|
libpng-dev &&\
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN wget https://www.open-mpi.org/software/ompi/v3.0/downloads/openmpi-3.0.0.tar.gz && \
|
|
gunzip -c openmpi-3.0.0.tar.gz | tar xf - && \
|
|
cd openmpi-3.0.0 && \
|
|
./configure --prefix=/home/.openmpi --with-cuda && \
|
|
make all install
|
|
|
|
ENV PATH="$PATH:/home/.openmpi/bin"
|
|
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/"
|
|
|
|
RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value
|
|
RUN curl -o ~/miniconda.sh -O https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
|
|
chmod +x ~/miniconda.sh && \
|
|
~/miniconda.sh -b -p /opt/conda && \
|
|
rm ~/miniconda.sh && \
|
|
/opt/conda/bin/conda update conda && \
|
|
/opt/conda/bin/conda install -y python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl mkl-include cython typing && \
|
|
/opt/conda/bin/conda clean -ya
|
|
ENV PATH /opt/conda/bin:$PATH
|
|
# This must be done before pip so that requirements.txt is available
|
|
WORKDIR /opt/pytorch
|
|
|
|
RUN git clone --recursive https://github.com/pytorch/pytorch
|
|
|
|
# Checkout 1.0rc1 release as latest master seems to have MPI backend detection broken
|
|
RUN TORCH_CUDA_ARCH_LIST="3.5 5.2 6.0 6.1 7.0+PTX" TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
|
|
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" BUILD_ATEN_MOBILE=ON USE_OPENMP=OFF \
|
|
cd pytorch/ && git checkout tags/v1.0rc1 && git submodule update --init --recursive && \
|
|
pip install -v .
|
|
|
|
RUN /opt/conda/bin/conda config --set ssl_verify False
|
|
RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org
|
|
RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org torchvision
|
|
WORKDIR /workspace
|
|
RUN chmod -R a+w /workspace
|
|
ADD ./mnist_DDP.py /opt/pytorch_dist_mnist/
|
|
|
|
ENTRYPOINT ["mpirun", "-n", "4", "--allow-run-as-root", "python", "-u", "/opt/pytorch_dist_mnist/mnist_DDP.py", "--gpu", "--modelpath", "/mnt/kubeflow-gcfs/pytorch/model"]
|
|
|
|
|