From 9284cf7ce3e5f952390dc4c47787b492076f754d Mon Sep 17 00:00:00 2001 From: David Oppenheimer Date: Thu, 24 Nov 2016 15:22:19 -0800 Subject: [PATCH] Add task documentation for safely draining nodes. Also update PDB to v1beta1. --- _data/tasks.yml | 2 + docs/admin/disruptions.md | 4 +- .../administer-cluster/safely-drain-node.md | 88 +++++++++++++++++++ docs/tasks/index.md | 2 + 4 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 docs/tasks/administer-cluster/safely-drain-node.md diff --git a/_data/tasks.yml b/_data/tasks.yml index 6d2cdbf4de..544b9b04bc 100644 --- a/_data/tasks.yml +++ b/_data/tasks.yml @@ -43,3 +43,5 @@ toc: path: /docs/tasks/manage-stateful-set/deleting-a-statefulset/ - title: Debugging a StatefulSet path: /docs/tasks/manage-stateful-set/debugging-a-statefulset/ + - title: Safely Draining a Node while Respecting Application SLOs + path: /docs/tasks/administer-cluster/safely-drain-node/ diff --git a/docs/admin/disruptions.md b/docs/admin/disruptions.md index 0743b565e6..df94c58cf2 100644 --- a/docs/admin/disruptions.md +++ b/docs/admin/disruptions.md @@ -1,6 +1,6 @@ --- assignees: -- mml +- davidopp --- This guide is for anyone wishing to specify safety constraints on pods or anyone @@ -59,7 +59,7 @@ itself. To attempt an eviction (perhaps more REST-precisely, to attempt to ```json { - "apiVersion": "policy/v1alpha1", + "apiVersion": "policy/v1beta1", "kind": "Eviction", "metadata": { "name": "quux", diff --git a/docs/tasks/administer-cluster/safely-drain-node.md b/docs/tasks/administer-cluster/safely-drain-node.md new file mode 100644 index 0000000000..b1fd572419 --- /dev/null +++ b/docs/tasks/administer-cluster/safely-drain-node.md @@ -0,0 +1,88 @@ +--- +assignees: +- davidopp + +--- + +{% capture overview %} +This page shows how to safely drain a machine, respecting the application-level +disruption SLOs you have specified using PodDisruptionBudget. +{% endcapture %} + +{% capture prerequisites %} + +This task assumes that you have met the following prerequisites: + +* You are using Kubernetes release >= 1.5. +* You have created [PodDisruptionBudget(s)](/docs/admin/disruptions.md) to express the +application-level disruption SLOs you want the system to enforce. + +{% endcapture %} + +{% capture steps %} + +### Use `kubectl drain` to remove a node from service + +You can use `kubectl drain` to safely evict all of your pods from a +node before you perform maintenance on the node (e.g. kernel upgrade, +hardware maintenance, etc.). Safe evictions allow the pod's containers +to +[gracefully terminate](/docs/user-guide/production-pods.md#lifecycle-hooks-and-termination-notice) and +will respect the `PodDisruptionBudgets` you have specified. + +**Note:** By default `kubectl drain` will ignore certain system pods on the node +that cannot be killed; see +the [kubectl drain](/docs/user-guide/kubectl/kubectl_drain.md) +documentation for more details. + +When `kubectl drain` returns successfully, that indicates that all of +the pods (except the ones excluded as described in the previous paragraph) +have been safely evicted (respecting the desired graceful +termination period, and without violating any application-level +disruption SLOs). It is then safe to bring down the node by powering +down its physical machine or, if running on a cloud platform, deleting its +virtual machine. + +First, identify the name of the node you wish to drain. You can list all of the nodes in your cluster with +```shell +kubectl get nodes +``` + +Next, tell Kubernetes to drain the node: +```shell +kubectl drain +``` + +Once it returns (without giving an error), you can power down the node +(or equivalently, if on a cloud platform, delete the virtual machine backing the node). +If you leave the node in the cluster during the maintenance operation, you need to run +```shell +kubectl uncordon +``` +afterwards to tell Kubernetes that it can resume scheduling new pods onto the node. + +### Draining multiple nodes in parallel + +The `kubectl drain` command should only be issued to a single node at a +time. However, you can run multiple `kubectl drain` commands for +different node in parallel, in different terminals or in the +background. Multiple drain commands running concurrently will still +respect the `PodDisruptionBudget` you specify. + +For example, if you have a StatefulSet with three replicas and have +set a `PodDisruptionBudget` for that set specifying `minAvailable: +2`. `kubectl drain` will only evict a pod from the StatefulSet if all +three pods are ready, and if you issue multiple drain commands in +parallel, Kubernetes will respect the PodDisruptionBudget an ensure +that only one pod is unavailable at any given time. Any drains that +would cause the number of ready replicas to fall below the specified +budget are blocked. + + +{% endcapture %} + +{% capture whatsnext %} +*TODO: link to other docs about Stateful Set?* +{% endcapture %} + +{% include templates/task.md %} diff --git a/docs/tasks/index.md b/docs/tasks/index.md index 947e45e968..819eb8d213 100644 --- a/docs/tasks/index.md +++ b/docs/tasks/index.md @@ -34,6 +34,8 @@ single thing, typically by giving a short sequence of steps. * [Scaling a StatefulSet](/docs/tasks/manage-stateful-set/scale-stateful-set/) * [Deleting a StatefulSet](/docs/tasks/manage-stateful-set/deleting-a-statefulset/) * [Debugging a StatefulSet](/docs/tasks/manage-stateful-set/debugging-a-statefulset/) +* [Safely Draining a Node while Respecting Application SLOs](/docs/tasks/administer-cluster/safely-drain-node/) + ### What's next