/* Copyright 2016 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package validation import ( "fmt" "net" "net/url" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/clientcmd" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/apis/kops/util" "k8s.io/kops/pkg/dns" ) // ValidationCluster a cluster to validate. type ValidationCluster struct { MastersReady bool `json:"mastersReady,omitempty"` MastersReadyArray []*ValidationNode `json:"mastersReadyArray,omitempty"` MastersNotReadyArray []*ValidationNode `json:"mastersNotReadyArray,omitempty"` MastersCount int `json:"mastersCount,omitempty"` NodesReady bool `json:"nodesReady,omitempty"` NodesReadyArray []*ValidationNode `json:"nodesReadyArray,omitempty"` NodesNotReadyArray []*ValidationNode `json:"nodesNotReadyArray,omitempty"` NodesCount int `json:"nodesCount,omitempty"` NodeList *v1.NodeList `json:"nodeList,omitempty"` ComponentFailures []string `json:"componentFailures,omitempty"` PodFailures []string `json:"podFailures,omitempty"` ErrorMessage string `json:"errorMessage,omitempty"` Status string `json:"status"` ClusterName string `json:"clusterName"` InstanceGroups []*kops.InstanceGroup `json:"instanceGroups,omitempty"` } // ValidationNode is A K8s node to be validated. type ValidationNode struct { Zone string `json:"zone,omitempty"` Role string `json:"role,omitempty"` Hostname string `json:"hostname,omitempty"` Status v1.ConditionStatus `json:"status,omitempty"` } const ( ClusterValidationFailed = "FAILED" ClusterValidationPassed = "PASSED" ) // HasPlaceHolderIP checks if the API DNS has been updated. func HasPlaceHolderIP(clusterName string) (bool, error) { config, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig( clientcmd.NewDefaultClientConfigLoadingRules(), &clientcmd.ConfigOverrides{CurrentContext: clusterName}).ClientConfig() apiAddr, err := url.Parse(config.Host) if err != nil { return true, fmt.Errorf("unable to parse Kubernetes cluster API URL: %v", err) } hostAddrs, err := net.LookupHost(apiAddr.Host) if err != nil { return true, fmt.Errorf("unable to resolve Kubernetes cluster API URL dns: %v", err) } for _, h := range hostAddrs { if h == "203.0.113.123" { return true, nil } } return false, nil } // ValidateCluster validate a k8s cluster with a provided instance group list func ValidateCluster(cluster *kops.Cluster, instanceGroupList *kops.InstanceGroupList, clusterKubernetesClient kubernetes.Interface) (*ValidationCluster, error) { clusterName := cluster.Name // Do not use if we are running gossip if !dns.IsGossipHostname(clusterName) { contextName := clusterName hasPlaceHolderIPAddress, err := HasPlaceHolderIP(contextName) if err != nil { return nil, err } if hasPlaceHolderIPAddress { message := "Validation Failed\n\n" + "The dns-controller Kubernetes deployment has not updated the Kubernetes cluster's API DNS entry to the correct IP address." + " The API DNS IP address is the placeholder address that kops creates: 203.0.113.123." + " Please wait about 5-10 minutes for a master to start, dns-controller to launch, and DNS to propagate." + " The protokube container and dns-controller deployment logs may contain more diagnostic information." + " Etcd and the API DNS entries must be updated for a kops Kubernetes cluster to start." validationCluster := &ValidationCluster{ ClusterName: clusterName, ErrorMessage: message, Status: ClusterValidationFailed, } validationFailed := fmt.Errorf("\nCannot reach cluster's API server: unable to Validate Cluster: %s", clusterName) return validationCluster, validationFailed } } var instanceGroups []*kops.InstanceGroup for i := range instanceGroupList.Items { ig := &instanceGroupList.Items[i] instanceGroups = append(instanceGroups, ig) } if len(instanceGroups) == 0 { return nil, fmt.Errorf("no InstanceGroup objects found") } validationCluster := &ValidationCluster{ ClusterName: clusterName, ErrorMessage: ClusterValidationPassed, InstanceGroups: instanceGroups, } nodes, err := clusterKubernetesClient.CoreV1().Nodes().List(metav1.ListOptions{}) if err != nil { return nil, fmt.Errorf("error querying nodes: %v", err) } validationCluster.NodeList = nodes validationCluster.ComponentFailures, err = collectComponentFailures(clusterKubernetesClient) if err != nil { return nil, fmt.Errorf("cannot get component status for %q: %v", clusterName, err) } validationCluster.PodFailures, err = collectPodFailures(clusterKubernetesClient) if err != nil { return nil, fmt.Errorf("cannot get pod health for %q: %v", clusterName, err) } return validateTheNodes(clusterName, validationCluster) } func collectComponentFailures(client kubernetes.Interface) (failures []string, err error) { componentList, err := client.CoreV1().ComponentStatuses().List(metav1.ListOptions{}) if err == nil { for _, component := range componentList.Items { for _, condition := range component.Conditions { if condition.Status != v1.ConditionTrue { failures = append(failures, component.Name) } } } } return } func collectPodFailures(client kubernetes.Interface) (failures []string, err error) { pods, err := client.CoreV1().Pods("kube-system").List(metav1.ListOptions{}) if err == nil { for _, pod := range pods.Items { if pod.Status.Phase == v1.PodSucceeded { continue } for _, status := range pod.Status.ContainerStatuses { if !status.Ready { failures = append(failures, pod.Name) } } } } return } func validateTheNodes(clusterName string, validationCluster *ValidationCluster) (*ValidationCluster, error) { nodes := validationCluster.NodeList if nodes == nil || len(nodes.Items) == 0 { return nil, fmt.Errorf("No nodes found in validationCluster") } // Needed for when NodesCount and MastersCounts are predefined, i.e tests presetNodeCount := validationCluster.NodesCount == 0 presetMasterCount := validationCluster.MastersCount == 0 for i := range nodes.Items { node := &nodes.Items[i] role := util.GetNodeRole(node) if role == "" { role = "node" } n := &ValidationNode{ Zone: node.ObjectMeta.Labels["failure-domain.beta.kubernetes.io/zone"], Hostname: node.ObjectMeta.Labels["kubernetes.io/hostname"], Role: role, Status: GetNodeReadyStatus(node), } ready := isNodeReady(node) // TODO: Use instance group role instead... if n.Role == "master" { if presetMasterCount { validationCluster.MastersCount++ } if ready { validationCluster.MastersReadyArray = append(validationCluster.MastersReadyArray, n) } else { validationCluster.MastersNotReadyArray = append(validationCluster.MastersNotReadyArray, n) } } else if n.Role == "node" { if presetNodeCount { validationCluster.NodesCount++ } if ready { validationCluster.NodesReadyArray = append(validationCluster.NodesReadyArray, n) } else { validationCluster.NodesNotReadyArray = append(validationCluster.NodesNotReadyArray, n) } } } validationCluster.MastersReady = true if len(validationCluster.MastersNotReadyArray) != 0 || validationCluster.MastersCount != len(validationCluster.MastersReadyArray) { validationCluster.MastersReady = false } validationCluster.NodesReady = true if len(validationCluster.NodesNotReadyArray) != 0 || validationCluster.NodesCount > len(validationCluster.NodesReadyArray) { validationCluster.NodesReady = false } if !validationCluster.MastersReady { validationCluster.Status = ClusterValidationFailed validationCluster.ErrorMessage = fmt.Sprintf("your masters are NOT ready %s", clusterName) return validationCluster, fmt.Errorf(validationCluster.ErrorMessage) } if !validationCluster.NodesReady { validationCluster.Status = ClusterValidationFailed validationCluster.ErrorMessage = fmt.Sprintf("your nodes are NOT ready %s", clusterName) return validationCluster, fmt.Errorf(validationCluster.ErrorMessage) } if len(validationCluster.ComponentFailures) != 0 { validationCluster.Status = ClusterValidationFailed validationCluster.ErrorMessage = fmt.Sprintf("your components are NOT healthy %s", clusterName) return validationCluster, fmt.Errorf(validationCluster.ErrorMessage) } if len(validationCluster.PodFailures) != 0 { validationCluster.Status = ClusterValidationFailed validationCluster.ErrorMessage = fmt.Sprintf("your kube-system pods are NOT healthy %s", clusterName) return validationCluster, fmt.Errorf(validationCluster.ErrorMessage) } return validationCluster, nil }