Merge pull request #9049 from johngmyers/validate-more-static

Validate more static pods on masters
This commit is contained in:
Kubernetes Prow Robot 2020-05-04 10:56:26 -07:00 committed by GitHub
commit 7da181e7a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 79 additions and 28 deletions

View File

@ -166,13 +166,13 @@ func (v *clusterValidatorImpl) Validate() (*ValidationCluster, error) {
if err != nil {
return nil, err
}
validation.validateNodes(cloudGroups)
readyNodes := validation.validateNodes(cloudGroups)
if err := validation.collectComponentFailures(ctx, v.k8sClient); err != nil {
return nil, fmt.Errorf("cannot get component status for %q: %v", clusterName, err)
}
if err := validation.collectPodFailures(ctx, v.k8sClient, nodeList.Items); err != nil {
if err := validation.collectPodFailures(ctx, v.k8sClient, readyNodes); err != nil {
return nil, fmt.Errorf("cannot get pod health for %q: %v", clusterName, err)
}
@ -199,13 +199,22 @@ func (v *ValidationCluster) collectComponentFailures(ctx context.Context, client
return nil
}
var masterStaticPods = []string{
"kube-apiserver",
"kube-controller-manager",
"kube-scheduler",
}
func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kubernetes.Interface, nodes []v1.Node) error {
masterWithoutManager := map[string]bool{}
masterWithoutPod := map[string]map[string]bool{}
nodeByAddress := map[string]string{}
for _, node := range nodes {
labels := node.GetLabels()
if labels != nil && labels["kubernetes.io/role"] == "master" {
masterWithoutManager[node.Name] = true
masterWithoutPod[node.Name] = map[string]bool{}
for _, pod := range masterStaticPods {
masterWithoutPod[node.Name][pod] = true
}
}
for _, nodeAddress := range node.Status.Addresses {
nodeByAddress[nodeAddress.Address] = node.Name
@ -254,9 +263,9 @@ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kuber
}
labels := pod.GetLabels()
if pod.Namespace == "kube-system" && labels != nil && labels["k8s-app"] == "kube-controller-manager" {
delete(masterWithoutManager, nodeByAddress[pod.Status.HostIP])
app := pod.GetLabels()["k8s-app"]
if pod.Namespace == "kube-system" && masterWithoutPod[nodeByAddress[pod.Status.HostIP]][app] {
delete(masterWithoutPod[nodeByAddress[pod.Status.HostIP]], app)
}
return nil
})
@ -264,18 +273,21 @@ func (v *ValidationCluster) collectPodFailures(ctx context.Context, client kuber
return fmt.Errorf("error listing Pods: %v", err)
}
for node := range masterWithoutManager {
v.addError(&ValidationError{
Kind: "Node",
Name: node,
Message: fmt.Sprintf("master %q is missing kube-controller-manager pod", node),
})
for node, nodeMap := range masterWithoutPod {
for app := range nodeMap {
v.addError(&ValidationError{
Kind: "Node",
Name: node,
Message: fmt.Sprintf("master %q is missing %s pod", node, app),
})
}
}
return nil
}
func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances.CloudInstanceGroup) {
func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances.CloudInstanceGroup) []v1.Node {
var readyNodes []v1.Node
for _, cloudGroup := range cloudGroups {
var allMembers []*cloudinstances.CloudInstanceGroupMember
allMembers = append(allMembers, cloudGroup.Ready...)
@ -332,6 +344,9 @@ func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances
}
ready := isNodeReady(node)
if ready {
readyNodes = append(readyNodes, *node)
}
if n.Role == "master" {
if !ready {
@ -358,4 +373,6 @@ func (v *ValidationCluster) validateNodes(cloudGroups map[string]*cloudinstances
}
}
}
return readyNodes
}

View File

@ -374,7 +374,7 @@ func Test_ValidateMasterNotReady(t *testing.T) {
}
}
func Test_ValidateMasterNoKubeControllerManager(t *testing.T) {
func Test_ValidateMasterStaticPods(t *testing.T) {
groups := make(map[string]*cloudinstances.CloudInstanceGroup)
groups["node-1"] = &cloudinstances.CloudInstanceGroup{
InstanceGroup: &kopsapi.InstanceGroup{
@ -427,37 +427,71 @@ func Test_ValidateMasterNoKubeControllerManager(t *testing.T) {
},
},
},
{
ID: "i-00003",
Node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "master-1c",
Labels: map[string]string{"kubernetes.io/role": "master"},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{Type: "Ready", Status: v1.ConditionFalse},
},
Addresses: []v1.NodeAddress{
{
Address: "9.10.11.12",
},
},
},
},
},
},
}
v, err := testValidate(t, groups, makePodList(
[]map[string]string{
var podList []map[string]string
expectedFailures := []*ValidationError{
{
Kind: "Node",
Name: "master-1c",
Message: "master \"master-1c\" is not ready",
},
}
for i, pod := range []string{
"kube-apiserver",
"kube-controller-manager",
"kube-scheduler",
} {
podList = append(podList, []map[string]string{
{
"name": "pod1",
"name": fmt.Sprintf("pod-a-%d", i),
"ready": "true",
"k8s-app": "kube-controller-manager",
"k8s-app": pod,
"phase": string(v1.PodRunning),
"priorityClassName": "system-cluster-critical",
"hostip": "1.2.3.4",
},
{
"name": "pod2",
"name": fmt.Sprintf("pod-b-%d", i),
"namespace": "other",
"ready": "true",
"k8s-app": "kube-controller-manager",
"k8s-app": pod,
"phase": string(v1.PodRunning),
"priorityClassName": "system-cluster-critical",
"hostip": "5.6.7.8",
},
},
))
require.NoError(t, err)
if !assert.Len(t, v.Failures, 1) ||
!assert.Equal(t, &ValidationError{
}...)
expectedFailures = append(expectedFailures, &ValidationError{
Kind: "Node",
Name: "master-1b",
Message: "master \"master-1b\" is missing kube-controller-manager pod",
}, v.Failures[0]) {
Message: "master \"master-1b\" is missing " + pod + " pod",
})
}
v, err := testValidate(t, groups, makePodList(podList))
require.NoError(t, err)
if !assert.ElementsMatch(t, v.Failures, expectedFailures) {
printDebug(t, v)
}
}