diff --git a/pkg/validation/validate_cluster.go b/pkg/validation/validate_cluster.go index 17ad4bd5ef..58ac90df61 100644 --- a/pkg/validation/validate_cluster.go +++ b/pkg/validation/validate_cluster.go @@ -166,13 +166,13 @@ func (v *clusterValidatorImpl) Validate() (*ValidationCluster, error) { if err != nil { return nil, err } - validation.validateNodes(cloudGroups) + readyNodes := validation.validateNodes(cloudGroups) if err := validation.collectComponentFailures(ctx, v.k8sClient); err != nil { return nil, fmt.Errorf("cannot get component status for %q: %v", clusterName, err) } - if err := validation.collectPodFailures(ctx, v.k8sClient, nodeList.Items); err != nil { + if err := validation.collectPodFailures(ctx, v.k8sClient, readyNodes); err != nil { return nil, fmt.Errorf("cannot get pod health for %q: %v", clusterName, err) } @@ -199,13 +199,22 @@ func (v *ValidationCluster) collectComponentFailures(ctx context.Context, client return nil } +var masterStaticPods = []string{ + "kube-apiserver", + "kube-controller-manager", + "kube-scheduler", +} + func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kubernetes.Interface, nodes []v1.Node) error { - masterWithoutManager := map[string]bool{} + masterWithoutPod := map[string]map[string]bool{} nodeByAddress := map[string]string{} for _, node := range nodes { labels := node.GetLabels() if labels != nil && labels["kubernetes.io/role"] == "master" { - masterWithoutManager[node.Name] = true + masterWithoutPod[node.Name] = map[string]bool{} + for _, pod := range masterStaticPods { + masterWithoutPod[node.Name][pod] = true + } } for _, nodeAddress := range node.Status.Addresses { nodeByAddress[nodeAddress.Address] = node.Name @@ -254,9 +263,9 @@ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kuber } - labels := pod.GetLabels() - if pod.Namespace == "kube-system" && labels != nil && labels["k8s-app"] == "kube-controller-manager" { - delete(masterWithoutManager, nodeByAddress[pod.Status.HostIP]) + app := pod.GetLabels()["k8s-app"] + if pod.Namespace == "kube-system" && masterWithoutPod[nodeByAddress[pod.Status.HostIP]][app] { + delete(masterWithoutPod[nodeByAddress[pod.Status.HostIP]], app) } return nil }) @@ -264,18 +273,21 @@ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kuber return fmt.Errorf("error listing Pods: %v", err) } - for node := range masterWithoutManager { - v.addError(&ValidationError{ - Kind: "Node", - Name: node, - Message: fmt.Sprintf("master %q is missing kube-controller-manager pod", node), - }) + for node, nodeMap := range masterWithoutPod { + for app := range nodeMap { + v.addError(&ValidationError{ + Kind: "Node", + Name: node, + Message: fmt.Sprintf("master %q is missing %s pod", node, app), + }) + } } return nil } -func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances.CloudInstanceGroup) { +func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances.CloudInstanceGroup) []v1.Node { + var readyNodes []v1.Node for _, cloudGroup := range cloudGroups { var allMembers []*cloudinstances.CloudInstanceGroupMember allMembers = append(allMembers, cloudGroup.Ready...) @@ -332,6 +344,9 @@ func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances } ready := isNodeReady(node) + if ready { + readyNodes = append(readyNodes, *node) + } if n.Role == "master" { if !ready { @@ -358,4 +373,6 @@ func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances } } } + + return readyNodes } diff --git a/pkg/validation/validate_cluster_test.go b/pkg/validation/validate_cluster_test.go index fb32e76b21..8829148876 100644 --- a/pkg/validation/validate_cluster_test.go +++ b/pkg/validation/validate_cluster_test.go @@ -374,7 +374,7 @@ func Test_ValidateMasterNotReady(t *testing.T) { } } -func Test_ValidateMasterNoKubeControllerManager(t *testing.T) { +func Test_ValidateMasterStaticPods(t *testing.T) { groups := make(map[string]*cloudinstances.CloudInstanceGroup) groups["node-1"] = &cloudinstances.CloudInstanceGroup{ InstanceGroup: &kopsapi.InstanceGroup{ @@ -427,37 +427,71 @@ func Test_ValidateMasterNoKubeControllerManager(t *testing.T) { }, }, }, + { + ID: "i-00003", + Node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "master-1c", + Labels: map[string]string{"kubernetes.io/role": "master"}, + }, + Status: v1.NodeStatus{ + Conditions: []v1.NodeCondition{ + {Type: "Ready", Status: v1.ConditionFalse}, + }, + Addresses: []v1.NodeAddress{ + { + Address: "9.10.11.12", + }, + }, + }, + }, + }, }, } - v, err := testValidate(t, groups, makePodList( - []map[string]string{ + var podList []map[string]string + expectedFailures := []*ValidationError{ + { + Kind: "Node", + Name: "master-1c", + Message: "master \"master-1c\" is not ready", + }, + } + + for i, pod := range []string{ + "kube-apiserver", + "kube-controller-manager", + "kube-scheduler", + } { + podList = append(podList, []map[string]string{ { - "name": "pod1", + "name": fmt.Sprintf("pod-a-%d", i), "ready": "true", - "k8s-app": "kube-controller-manager", + "k8s-app": pod, "phase": string(v1.PodRunning), "priorityClassName": "system-cluster-critical", "hostip": "1.2.3.4", }, { - "name": "pod2", + "name": fmt.Sprintf("pod-b-%d", i), "namespace": "other", "ready": "true", - "k8s-app": "kube-controller-manager", + "k8s-app": pod, "phase": string(v1.PodRunning), "priorityClassName": "system-cluster-critical", "hostip": "5.6.7.8", }, - }, - )) - require.NoError(t, err) - if !assert.Len(t, v.Failures, 1) || - !assert.Equal(t, &ValidationError{ + }...) + expectedFailures = append(expectedFailures, &ValidationError{ Kind: "Node", Name: "master-1b", - Message: "master \"master-1b\" is missing kube-controller-manager pod", - }, v.Failures[0]) { + Message: "master \"master-1b\" is missing " + pod + " pod", + }) + } + + v, err := testValidate(t, groups, makePodList(podList)) + require.NoError(t, err) + if !assert.ElementsMatch(t, v.Failures, expectedFailures) { printDebug(t, v) } }