mirror of https://github.com/kubernetes/kops.git
92 lines
3.5 KiB
Bash
Executable File
92 lines
3.5 KiB
Bash
Executable File
# Copyright 2017 The Kubernetes Authors.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
#!/bin/bash
|
|
set -euo pipefail
|
|
set -x
|
|
|
|
#################################################
|
|
# Install nvidia-docker2
|
|
|
|
# This section is somewhat adapted from README at:
|
|
# https://github.com/NVIDIA/nvidia-docker
|
|
|
|
#######################################
|
|
# Cleanup old nvidia-docker
|
|
|
|
# If you have nvidia-docker 1.0 installed: we need to remove it and all existing GPU containers
|
|
docker volume ls -q -f driver=nvidia-docker | xargs -r -I{} -n1 docker ps -q -a -f volume={} | xargs -r docker rm -f
|
|
|
|
# Remove the old nvidia-docker if it exists
|
|
apt-get purge -y nvidia-docker || true
|
|
|
|
#######################################
|
|
# Add package repositories
|
|
|
|
# Add the package repository for docker-ce
|
|
curl -fsSL https://download.docker.com/linux/debian/gpg | \
|
|
apt-key add -
|
|
echo 'deb [arch=amd64] https://download.docker.com/linux/debian stretch stable' | \
|
|
tee /etc/apt/sources.list.d/docker-ce.list
|
|
|
|
# Add the package repository for nvidia-docker
|
|
curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | \
|
|
apt-key add -
|
|
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
|
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \
|
|
tee /etc/apt/sources.list.d/nvidia-docker.list
|
|
|
|
# Override the default runtime with the one from nvidia
|
|
# Also explicity set the storage-driver to the prior 'overlay'
|
|
cat << 'EOF' > /etc/docker/daemon.json
|
|
{
|
|
"default-runtime": "nvidia",
|
|
"runtimes": {
|
|
"nvidia": {
|
|
"path": "/usr/bin/nvidia-container-runtime",
|
|
"runtimeArgs": []
|
|
}
|
|
},
|
|
"storage-driver": "overlay"
|
|
}
|
|
EOF
|
|
|
|
# Install nvidia-docker2 and reload the Docker daemon configuration
|
|
# Note that the nvidia-docker version must match the docker-ce version
|
|
# --force-confold prevents prompt for replacement of daemon.json
|
|
apt-get -y update
|
|
# Stop protokube to ensure not bring kubelet up again
|
|
systemctl stop protokube
|
|
# Stop kubelet to ensure not bring stopped containers up again and leak
|
|
# them as orphan containers
|
|
systemctl stop kubelet
|
|
# pin versions https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-20-if-im-not-using-the-latest-docker-version
|
|
apt-get install -y --allow-downgrades -o Dpkg::Options::="--force-confold" \
|
|
nvidia-docker2=2.0.3+docker18.09.4-1 \
|
|
nvidia-container-runtime=2.0.0+docker18.09.4-1 \
|
|
docker-ce=5:18.09.4~3-0~debian-stretch
|
|
|
|
# Disable a few things that break docker-ce/gpu support upon reboot:
|
|
# Upon boot, the kops-configuration.service systemd unit sets up and starts
|
|
# the cloud-init.service which runs nodeup which forces docker-ce to a
|
|
# specific version that is a downgrade and incompatible with nvidia-docker2.
|
|
# Permanently disable these systemd units via masking.
|
|
systemctl mask cloud-init.service
|
|
systemctl mask kops-configuration.service
|
|
|
|
# Restore protokube and protokube will bring up kubelet
|
|
systemctl start protokube
|
|
# Seems protokube won't bring up kubelet, so start kubelet separately
|
|
systemctl start kubelet
|