Use PriorityClassName instead of namespace in cluster validation

This commit is contained in:
John Gardiner Myers 2020-01-20 15:25:29 -08:00
parent 79bbc3ef19
commit c557289c4b
3 changed files with 127 additions and 69 deletions

View File

@ -15,8 +15,10 @@ go_library(
"//upup/pkg/fi:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//vendor/k8s.io/client-go/kubernetes:go_default_library",
"//vendor/k8s.io/client-go/tools/clientcmd:go_default_library",
"//vendor/k8s.io/client-go/tools/pager:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
)

View File

@ -17,11 +17,14 @@ limitations under the License.
package validation
import (
"context"
"fmt"
"net"
"net/url"
"strings"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/pager"
"k8s.io/kops/upup/pkg/fi"
v1 "k8s.io/api/core/v1"
@ -195,11 +198,6 @@ func (v *ValidationCluster) collectComponentFailures(client kubernetes.Interface
}
func (v *ValidationCluster) collectPodFailures(client kubernetes.Interface, nodes []v1.Node) error {
pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{})
if err != nil {
return fmt.Errorf("error listing Pods: %v", err)
}
masterWithoutManager := map[string]bool{}
nodeByAddress := map[string]string{}
for _, node := range nodes {
@ -212,17 +210,24 @@ func (v *ValidationCluster) collectPodFailures(client kubernetes.Interface, node
}
}
for _, pod := range pods.Items {
err := pager.New(pager.SimplePageFunc(func(opts metav1.ListOptions) (runtime.Object, error) {
return client.CoreV1().Pods(metav1.NamespaceAll).List(opts)
})).EachListItem(context.TODO(), metav1.ListOptions{}, func(obj runtime.Object) error {
pod := obj.(*v1.Pod)
priority := pod.Spec.PriorityClassName
if priority != "system-cluster-critical" && priority != "system-node-critical" {
return nil
}
if pod.Status.Phase == v1.PodSucceeded {
continue
return nil
}
if pod.Status.Phase == v1.PodPending {
v.addError(&ValidationError{
Kind: "Pod",
Name: "kube-system/" + pod.Name,
Message: fmt.Sprintf("kube-system pod %q is pending", pod.Name),
Name: pod.Namespace + "/" + pod.Name,
Message: fmt.Sprintf("%s pod %q is pending", priority, pod.Name),
})
continue
return nil
}
var notready []string
for _, container := range pod.Status.ContainerStatuses {
@ -233,8 +238,8 @@ func (v *ValidationCluster) collectPodFailures(client kubernetes.Interface, node
if len(notready) != 0 {
v.addError(&ValidationError{
Kind: "Pod",
Name: "kube-system/" + pod.Name,
Message: fmt.Sprintf("kube-system pod %q is not ready (%s)", pod.Name, strings.Join(notready, ",")),
Name: pod.Namespace + "/" + pod.Name,
Message: fmt.Sprintf("%s pod %q is not ready (%s)", priority, pod.Name, strings.Join(notready, ",")),
})
}
@ -243,6 +248,10 @@ func (v *ValidationCluster) collectPodFailures(client kubernetes.Interface, node
if pod.Namespace == "kube-system" && labels != nil && labels["k8s-app"] == "kube-controller-manager" {
delete(masterWithoutManager, nodeByAddress[pod.Status.HostIP])
}
return nil
})
if err != nil {
return fmt.Errorf("error listing Pods: %v", err)
}
for node := range masterWithoutManager {

View File

@ -17,6 +17,7 @@ limitations under the License.
package validation
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
@ -379,19 +380,21 @@ func Test_ValidateMasterNoKubeControllerManager(t *testing.T) {
v, err := testValidate(t, groups, makePodList(
[]map[string]string{
{
"name": "pod1",
"ready": "true",
"k8s-app": "kube-controller-manager",
"phase": string(v1.PodRunning),
"hostip": "1.2.3.4",
"name": "pod1",
"ready": "true",
"k8s-app": "kube-controller-manager",
"phase": string(v1.PodRunning),
"priorityClassName": "system-cluster-critical",
"hostip": "1.2.3.4",
},
{
"name": "pod2",
"namespace": "other",
"ready": "true",
"k8s-app": "kube-controller-manager",
"phase": string(v1.PodRunning),
"hostip": "5.6.7.8",
"name": "pod2",
"namespace": "other",
"ready": "true",
"k8s-app": "kube-controller-manager",
"phase": string(v1.PodRunning),
"priorityClassName": "system-cluster-critical",
"hostip": "5.6.7.8",
},
},
))
@ -457,67 +460,109 @@ func Test_ValidateComponentFailure(t *testing.T) {
}
func Test_ValidateNoPodFailures(t *testing.T) {
v, err := testValidate(t, nil, makePodList(
[]map[string]string{
{
"name": "pod1",
"ready": "true",
"phase": string(v1.PodRunning),
},
{
"name": "job1",
"ready": "false",
"phase": string(v1.PodSucceeded),
},
},
))
testpods := []map[string]string{}
for _, phase := range []v1.PodPhase{
v1.PodPending,
v1.PodRunning,
v1.PodSucceeded,
v1.PodFailed,
v1.PodUnknown,
} {
for _, priority := range []string{"", "otherPriority"} {
testpods = append(testpods, []map[string]string{
{
"name": fmt.Sprintf("ready-%s-%s", priority, string(phase)),
"namespace": "kube-system",
"priorityClassName": priority,
"ready": "true",
"phase": string(phase),
},
{
"name": fmt.Sprintf("notready-%s-%s", priority, string(phase)),
"namespace": "kube-system",
"priorityClassName": priority,
"ready": "false",
"phase": string(phase),
},
}...)
}
}
for _, namespace := range []string{"kube-system", "otherNamespace"} {
for _, priority := range []string{"node", "cluster"} {
testpods = append(testpods, []map[string]string{
{
"name": fmt.Sprintf("ready-%s-%s", priority, namespace),
"namespace": namespace,
"priorityClassName": fmt.Sprintf("system-%s-critical", priority),
"ready": "true",
"phase": string(v1.PodRunning),
},
{
"name": fmt.Sprintf("notready-%s-%s", priority, namespace),
"namespace": namespace,
"priorityClassName": fmt.Sprintf("system-%s-critical", priority),
"ready": "false",
"phase": string(v1.PodSucceeded),
},
}...)
}
}
v, err := testValidate(t, nil, makePodList(testpods))
require.NoError(t, err)
assert.Empty(t, v.Failures)
if !assert.Empty(t, v.Failures) {
printDebug(t, v)
}
}
func Test_ValidatePodFailure(t *testing.T) {
for _, tc := range []struct {
name string
phase v1.PodPhase
expected ValidationError
expected string
}{
{
name: "pending",
phase: v1.PodPending,
expected: ValidationError{
Kind: "Pod",
Name: "kube-system/pod1",
Message: "kube-system pod \"pod1\" is pending",
},
name: "pending",
phase: v1.PodPending,
expected: "pending",
},
{
name: "notready",
phase: v1.PodRunning,
expected: ValidationError{
Kind: "Pod",
Name: "kube-system/pod1",
Message: "kube-system pod \"pod1\" is not ready (container1,container2)",
},
name: "notready",
phase: v1.PodRunning,
expected: "not ready (container1,container2)",
},
} {
t.Run(tc.name, func(t *testing.T) {
v, err := testValidate(t, nil, makePodList(
[]map[string]string{
{
"name": "pod1",
"ready": "false",
"phase": string(tc.phase),
},
},
))
for _, priority := range []string{"node", "cluster"} {
for _, namespace := range []string{"kube-system", "otherNamespace"} {
t.Run(fmt.Sprintf("%s-%s-%s", tc.name, priority, namespace), func(t *testing.T) {
v, err := testValidate(t, nil, makePodList(
[]map[string]string{
{
"name": "pod1",
"namespace": namespace,
"priorityClassName": fmt.Sprintf("system-%s-critical", priority),
"ready": "false",
"phase": string(tc.phase),
},
},
))
expected := ValidationError{
Kind: "Pod",
Name: fmt.Sprintf("%s/pod1", namespace),
Message: fmt.Sprintf("system-%s-critical pod \"pod1\" is %s", priority, tc.expected),
}
require.NoError(t, err)
if !assert.Len(t, v.Failures, 1) ||
!assert.Equal(t, &tc.expected, v.Failures[0]) {
printDebug(t, v)
require.NoError(t, err)
if !assert.Len(t, v.Failures, 1) ||
!assert.Equal(t, &expected, v.Failures[0]) {
printDebug(t, v)
}
})
}
})
}
}
}
@ -543,7 +588,9 @@ func dummyPod(podMap map[string]string) v1.Pod {
Namespace: namespace,
Labels: labels,
},
Spec: v1.PodSpec{},
Spec: v1.PodSpec{
PriorityClassName: podMap["priorityClassName"],
},
Status: v1.PodStatus{
Phase: v1.PodPhase(podMap["phase"]),
ContainerStatuses: []v1.ContainerStatus{