Updated Kops GPU Setup Hook

* Changed Dockerfile base image to debian for systemctl and bash. * Added autodetect of AWS ec2 instanceclass p2, p3, g3. * For each detected instance class, added the installation of the proper driver according to the specific NVIDIA hardware. - G3 instance types require Nvidia Grid Series/Grid K520 drivers - P2 instance types require Nvidia Tesla K-Series drivers - P3 instance types require Nvidia Tesla V-Series drivers * Set custom nvidia-smi configurations according to nvidia hardware per ec2 instanceclass, according to the AWS GPU optimization document. * Added the installation and patches of the latest cuda 9.1 libraries. * Added restart of kubelet on kube node at end of successful hook run, thereby fixing a race condition where kubelet would start before the Nvidia drivers were loaded, thus not allowing kubernetes to detect GPUS on the kube node. * Ensured build of nvidia drivers used same gcc version as that which built default kops kernel. * Fixed issue where *every* run of this container would download all the NVIDIA drivers + cuda libs (1GB+), by caching the files on the kube node. * Fixed issue where after reboot, subsequent runs of this script would fail because mknod would try to create a previously-created device node and fail. This previously caused download loop as systemd perpetually restarted the unit upon failure. * Tested with p2.xlarge, p3.2xlarge, and g3.4xlarge
2018-04-11 19:42:27 +00:00 · 2018-04-11 19:42:27 +00:00 · 69ab306eac
parent 3781d63ca3
commit 69ab306eac
2 changed files with 208 additions and 36 deletions
--- a/hooks/nvidia-bootstrap/image/Dockerfile
+++ b/hooks/nvidia-bootstrap/image/Dockerfile
@ -12,9 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-FROM alpine:3.6
+FROM debian:jessie
+# ^ Cannot be Alpine since it does not support systemctl
+# ^ Systemctl is used to restart kubelet upon successful run of run.sh
+
+RUN apt-get update && apt-get -yq install curl jq

-RUN apk --no-cache add ca-certificates wget && update-ca-certificates
 ADD run.sh /run.sh

-CMD /run.sh
+CMD [ "/bin/bash", "/run.sh" ]
--- a/hooks/nvidia-bootstrap/image/run.sh
+++ b/hooks/nvidia-bootstrap/image/run.sh
@ -12,50 +12,219 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-#!/bin/sh
+#!/bin/bash
+set -euo pipefail

-# Simple early detection of nvidia card
-grep 10de102d /proc/bus/pci/devices || exit 0
+#################################################
+# Settings

-# p2.xlarge
-# 00f0	10de102d	4b	        84000000	      100000000c	               0	        8200000c	               0	               0	               0	         1000000	       400000000	               0	         2000000	               0	               0	               0	nvidia
+# A place on the host machine to cache these huge 1.6GB+ downloads in between reboots.
+ROOTFS_DIR=/rootfs
+CACHE_DIR_HOST=/nvidia-bootstrap-cache
+CACHE_DIR_CONTAINER="${ROOTFS_DIR}${CACHE_DIR_HOST}"
+
+# AWS Instance Types to Nvidia Card Mapping (cut and pasted from AWS docs)
+# Load the correct driver for the correct instance type
+#   Instances  Product Type  Product Series  Product
+#   G2         GRID          GRID Series     GRID K520   <-- I think they meant G3
+#   P2         Tesla         K-Series        K-80
+#   P3         Tesla         V-Series        V100
+# Both P2 and P3 are set for Cuda Toolkit 9.1
+# http://www.nvidia.com/Download/index.aspx
+declare -A class_to_driver_file
+class_to_driver_file=( \
+    ["g3"]="http://us.download.nvidia.com/XFree86/Linux-x86_64/367.124/NVIDIA-Linux-x86_64-367.124.run" \
+    ["p2"]="http://us.download.nvidia.com/tesla/390.46/NVIDIA-Linux-x86_64-390.46.run" \
+    ["p3"]="http://us.download.nvidia.com/tesla/390.46/NVIDIA-Linux-x86_64-390.46.run" \
+)
+declare -A class_to_driver_checksum
+class_to_driver_checksum=( \
+    ["g3"]="77f37939efeea4b6505842bed50445971992e303" \
+    ["p2"]="57569ecb6f6d839ecc77fa10a2c573cc069990cc" \
+    ["p3"]="57569ecb6f6d839ecc77fa10a2c573cc069990cc" \
+)
+
+# CUDA Files that need to be installed ~1.4GB
+#   First one is main installation
+#   Subsequent files are patches which need to be applied in order
+#   Order in the arrays below matters
+# https://developer.nvidia.com/cuda-downloads
+cuda_files=( \
+  "https://developer.nvidia.com/compute/cuda/9.1/Prod/local_installers/cuda_9.1.85_387.26_linux" \
+  "https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/1/cuda_9.1.85.1_linux" \
+  "https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/2/cuda_9.1.85.2_linux" \
+  "https://developer.nvidia.com/compute/cuda/9.1/Prod/patches/3/cuda_9.1.85.3_linux" \
+)
+cuda_files_checksums=( \
+  "1540658f4fe657dddd8b0899555b7468727d4aa8" \
+  "7ec6970ecd81163b0d02ef30d35599e7fd6e97d8" \
+  "cfa3b029b58fc117d8ce510a70efc848924dd565" \
+  "6269a2c5784b08997edb97ea0020fb4e6c8769ed" \
+)
+
+containsElement () { for e in "${@:2}"; do [[ "$e" = "$1" ]] && return 0; done; return 1; }
+
+#################################################
+# Ensure that we are on a proper AWS GPU Instance
+
+AWS_INSTANCE_TYPE=$(curl -m 2 -fsSL http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r ".instanceType" || true) # eg: p2.micro
+AWS_INSTANCE_CLASS=$(echo $AWS_INSTANCE_TYPE | cut -d . -f 1 || true) # eg: p2
+
+if [[ -z $AWS_INSTANCE_TYPE ]] || [[ -z $AWS_INSTANCE_CLASS ]]; then
+  echo "This machine is not an AWS instance"
+  echo "  Exiting without installing GPU drivers"
+  exit 0
+fi
+
+classnames=${!class_to_driver_file[@]} # e.g. [ "g3", "p2", "p3" ]
+if ! containsElement $AWS_INSTANCE_CLASS $classnames; then
+  echo "This machine is an AWS instance, but not a GPU instance"
+  echo "  Exiting without installing GPU drivers"
+  exit 0
+fi
+
+echo "Identified machine as AWS_INSTANCE_TYPE[$AWS_INSTANCE_TYPE] AWS_INSTANCE_CLASS[$AWS_INSTANCE_CLASS]"
+
+#################################################
+# Install dependencies
+
+# Install GCC and linux headers on the host machine
+# This is unfortunate but necessary.  That NVIDIA driver build must be
+#   compiled with the same version of GCC as the kernel.  In addition,
+#   linux-headers are machine image specific.
+
+if [[ ! -f ${ROOTFS_DIR}/usr/bin/gcc ]]; then
+  # Cuda requires regular stock gcc and host headers
+  chroot ${ROOTFS_DIR} apt-get update
+  # use --no-upgrade so that the c-libs are not upgraded, possible breaking programs and requiring restart
+  chroot ${ROOTFS_DIR} /bin/bash -c 'apt-get --no-upgrade -y install gcc linux-headers-$(uname -r)'
+fi
+
+if [[ ! -f ${ROOTFS_DIR}/usr/bin/gcc-7 ]]; then
+  echo "Installing gcc-7 on host machine"
+
+  # Temporarily add the debian "buster" repo where gcc-7 lives
+  #   But first clear it out first if it already exists
+  sed -n '/buster/q;p' -i ${ROOTFS_DIR}/etc/apt/sources.list
+  echo "deb http://deb.debian.org/debian buster main" >> ${ROOTFS_DIR}/etc/apt/sources.list
+
+  # Install gcc-7
+  chroot ${ROOTFS_DIR} apt-get update
+  chroot ${ROOTFS_DIR} /bin/bash -c 'apt-get -y install linux-headers-$(uname -r)'
+  chroot ${ROOTFS_DIR} /bin/bash -c 'DEBIAN_FRONTEND=noninteractive apt-get -t buster --no-upgrade -y install gcc-7'
+
+  # Remove the debian "buster" repo line that was added above
+  sed -n '/buster/q;p' -i ${ROOTFS_DIR}/etc/apt/sources.list
+  chroot ${ROOTFS_DIR} apt-get update
+fi
+
+# Unload open-source nouveau driver if it exists
+#   The nvidia drivers won't install otherwise
+#   "g3" instances in particular have this module auto-loaded
+chroot ${ROOTFS_DIR} modprobe -r nouveau || true


-# This is pretty annoying.... note this is installed onto the host
-chroot /rootfs apt-get update
-chroot /rootfs apt-get install --yes gcc
+#################################################
+# Download and install the Nvidia drivers and cuda libraries

-mkdir -p /rootfs/tmp
-cd /rootfs/tmp
-# TODO: We can't download over SSL - presents an akamai cert
-wget http://us.download.nvidia.com/XFree86/Linux-x86_64/375.39/NVIDIA-Linux-x86_64-375.39.run
-echo '5e5b9fbf12f4f926ed70c1fe39f71d9d9f154abea0268b1cf035982b34bd7c94baef7667e4f647cc19a62702b46f63b3c3df9f1589261f7138ed2ff151af63cc  NVIDIA-Linux-x86_64-375.39.run' | sha3sum -c - || exit 1
-chmod +x NVIDIA-Linux-x86_64-375.39.run
-chroot /rootfs /tmp/NVIDIA-Linux-x86_64-375.39.run --accept-license --ui=none
+# Create list of URLs and Checksums by merging driver item with array of cuda files
+downloads=(${class_to_driver_file[$AWS_INSTANCE_CLASS]} ${cuda_files[@]})
+checksums=(${class_to_driver_checksum[$AWS_INSTANCE_CLASS]} ${cuda_files_checksums[@]})

-cd /rootfs/tmp
-wget https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
-chmod +x cuda_8.0.61_375.26_linux-run
-# If we want to install samples as well, add: --samples
-chroot /rootfs /tmp/cuda_8.0.61_375.26_linux-run --toolkit --silent
+# Ensure that the cache directory exists
+mkdir -p $CACHE_DIR_CONTAINER

-chroot /rootfs nvidia-smi -pm 1
-chroot /rootfs nvidia-smi -acp 0
-chroot /rootfs nvidia-smi --auto-boost-default=0
-chroot /rootfs nvidia-smi --auto-boost-permission=0
-chroot /rootfs nvidia-smi -ac 2505,875
+# Download, verify, and execute each file
+length=${#downloads[@]}
+for (( i=0; i<${length}; i++ )); do
+  download=${downloads[$i]}
+  checksum=${checksums[$i]}
+  filename=$(basename $download)
+  filepath_host="${CACHE_DIR_HOST}/${filename}"
+  filepath_container="${CACHE_DIR_CONTAINER}/${filename}"
+  filepath_installed="${CACHE_DIR_CONTAINER}/${filename}.installed"

+  echo "Checking for file at $filepath_container"
+  if [[ ! -f $filepath_container ]] || ! (echo "$checksum  $filepath_container" | sha1sum -c - 2>&1 >/dev/null); then
+    echo "Downloading $download"
+    curl -L $download > $filepath_container
+    chmod a+x $filepath_container
+  fi

-# TODO: Problem ... why is this needed - why didn't this happen when we installed nvidia-uvm?
-# TODO: Problem ... we need to restart kubelet
+  echo "Verifying sha1sum of file at $filepath_container"
+  if ! (echo "$checksum  $filepath_container" | sha1sum -c -); then
+    echo "Failed to verify sha1sum for file at $filepath_container"
+    exit 1
+  fi

-chroot /rootfs /sbin/modprobe nvidia-uvm
+  # Install the Nvidia driver and cuda libs
+  if [[ -f $filepath_installed ]]; then
+    echo "Detected prior install of file $filename on host"
+  else
+    echo "Installing file $filename on host"
+    if [[ $download =~ .*NVIDIA.* ]]; then
+      # Install the nvidia package (using gcc-7)
+      chroot ${ROOTFS_DIR} /bin/bash -c "CC=/usr/bin/gcc-7 $filepath_host --accept-license --silent"
+      touch $filepath_installed # Mark successful installation
+    elif [[ $download =~ .*local_installers.*cuda.* ]]; then
+      # Install the primary cuda library (using gcc)
+      chroot ${ROOTFS_DIR} $filepath_host --toolkit --silent --verbose
+      touch $filepath_installed # Mark successful installation
+    elif [[ $download =~ .*patches.*cuda.* ]]; then
+      # Install an update to the primary cuda library (using gcc)
+      chroot ${ROOTFS_DIR} $filepath_host --accept-eula --silent
+      touch $filepath_installed # Mark successful installation
+    else
+      echo "Unable to handle file $filepath_host"
+    fi
+  fi
+done

-if [ "$?" -eq 0 ]; then
+#################################################
+# Now that things are installed, let's output GPU info for debugging
+chroot ${ROOTFS_DIR} nvidia-smi --list-gpus
+
+# Configure and Optimize Nvidia cards now that things are installed
+#   AWS Optimizization Doc
+#     https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/optimize_gpu.html
+#   Nvidia Doc
+#     http://developer.download.nvidia.com/compute/DCGM/docs/nvidia-smi-367.38.pdf
+
+# Common configurations
+chroot ${ROOTFS_DIR} nvidia-smi -pm 1
+chroot ${ROOTFS_DIR} nvidia-smi --auto-boost-default=0
+chroot ${ROOTFS_DIR} nvidia-smi --auto-boost-permission=0
+
+# Custom configurations per class of nvidia video card
+case "$AWS_INSTANCE_CLASS" in
+"g2" | "g3")
+  chroot ${ROOTFS_DIR} nvidia-smi -ac 2505,1177
+  ;;
+"p2")
+  chroot ${ROOTFS_DIR} nvidia-smi -ac 2505,875
+  chroot ${ROOTFS_DIR} nvidia-smi -acp 0
+  ;;
+"p3")
+  chroot ${ROOTFS_DIR} nvidia-smi -ac 877,1530
+  chroot ${ROOTFS_DIR} nvidia-smi -acp 0
+  ;;
+*)
+  ;;
+esac
+
+# Load the Kernel Module
+if ! chroot ${ROOTFS_DIR} /sbin/modprobe nvidia-uvm; then
+  echo "Unable to modprobe nvidia-uvm"
+  exit 1
+fi
+
+# Ensure that the device node exists
+if ! chroot ${ROOTFS_DIR} test -e /dev/nvidia-uvm; then
  # Find out the major device number used by the nvidia-uvm driver
  D=`grep nvidia-uvm /proc/devices | awk '{print $1}'`
-
-  chroot /rootfs mknod -m 666 /dev/nvidia-uvm c $D 0
-else
-  echo "Unable to modprobe nvidia-uvm"
+  chroot ${ROOTFS_DIR} mknod -m 666 /dev/nvidia-uvm c $D 0
 fi
+
+# Restart Kubelet
+echo "Restarting Kubelet"
+systemctl restart kubelet.service