From 73f164e229b16610768aa48221a65eb889891488 Mon Sep 17 00:00:00 2001 From: John Gardiner Myers Date: Tue, 30 Nov 2021 17:00:02 -0800 Subject: [PATCH 1/2] Use instance ID as node name when AWS CCM supports it --- cmd/kops-controller/pkg/config/options.go | 3 +++ cmd/kops-controller/pkg/server/server.go | 2 +- nodeup/pkg/model/kubelet.go | 3 ++- pkg/apis/nodeup/config.go | 6 ++++++ pkg/bootstrap/authenticate.go | 2 +- upup/pkg/fi/cloudup/awsup/aws_cloud.go | 10 ++++++---- upup/pkg/fi/cloudup/awsup/aws_verifier.go | 4 ++-- .../gce/tpm/gcetpmverifier/tpmverifier.go | 2 +- upup/pkg/fi/cloudup/template_functions.go | 6 +++++- upup/pkg/fi/nodeup/command.go | 17 +++++++++++------ 10 files changed, 38 insertions(+), 17 deletions(-) diff --git a/cmd/kops-controller/pkg/config/options.go b/cmd/kops-controller/pkg/config/options.go index 72b7a3a783..5c91b2fdef 100644 --- a/cmd/kops-controller/pkg/config/options.go +++ b/cmd/kops-controller/pkg/config/options.go @@ -55,6 +55,9 @@ type ServerOptions struct { SigningCAs []string `json:"signingCAs"` // CertNames is the list of active certificate names. CertNames []string `json:"certNames"` + + // UseInstanceIDForNodeName uses the instance ID instead of the hostname for the node name. + UseInstanceIDForNodeName bool `json:"useInstanceIDForNodeName,omitempty"` } type ServerProviderOptions struct { diff --git a/cmd/kops-controller/pkg/server/server.go b/cmd/kops-controller/pkg/server/server.go index 03e405f5ab..bd44d26906 100644 --- a/cmd/kops-controller/pkg/server/server.go +++ b/cmd/kops-controller/pkg/server/server.go @@ -109,7 +109,7 @@ func (s *Server) bootstrap(w http.ResponseWriter, r *http.Request) { ctx := r.Context() - id, err := s.verifier.VerifyToken(ctx, r.Header.Get("Authorization"), body) + id, err := s.verifier.VerifyToken(ctx, r.Header.Get("Authorization"), body, s.opt.Server.UseInstanceIDForNodeName) if err != nil { klog.Infof("bootstrap %s verify err: %v", r.RemoteAddr, err) w.WriteHeader(http.StatusForbidden) diff --git a/nodeup/pkg/model/kubelet.go b/nodeup/pkg/model/kubelet.go index d706f57aeb..cfbee3b08e 100644 --- a/nodeup/pkg/model/kubelet.go +++ b/nodeup/pkg/model/kubelet.go @@ -623,5 +623,6 @@ func (b *KubeletBuilder) kubeletNames() ([]string, error) { return nil, fmt.Errorf("error describing instances: %v", err) } - return awsup.GetInstanceCertificateNames(result) + useInstanceIDForNodeName := b.Cluster.Spec.ExternalCloudControllerManager != nil && b.IsKubernetesGTE("1.23") + return awsup.GetInstanceCertificateNames(result, useInstanceIDForNodeName) } diff --git a/pkg/apis/nodeup/config.go b/pkg/apis/nodeup/config.go index 6a3dde88fd..466491c0bb 100644 --- a/pkg/apis/nodeup/config.go +++ b/pkg/apis/nodeup/config.go @@ -78,6 +78,8 @@ type Config struct { APIServerConfig *APIServerConfig `json:",omitempty"` // NvidiaGPU contains the configuration for nvidia NvidiaGPU *kops.NvidiaGPUConfig `json:",omitempty"` + // UseInstanceIDForNodeName uses the instance ID instead of the hostname for the node name. + UseInstanceIDForNodeName bool `json:"useInstanceIDForNodeName,omitempty"` } // BootConfig is the configuration for the nodeup binary that might be too big to fit in userdata. @@ -206,6 +208,10 @@ func NewConfig(cluster *kops.Cluster, instanceGroup *kops.InstanceGroup) (*Confi config.DefaultMachineType = fi.String(strings.Split(instanceGroup.Spec.MachineType, ",")[0]) } + if cluster.Spec.ExternalCloudControllerManager != nil && cluster.IsKubernetesGTE("1.23") && cluster.Spec.CloudProvider == string(kops.CloudProviderAWS) { + config.UseInstanceIDForNodeName = true + } + return &config, &bootConfig } diff --git a/pkg/bootstrap/authenticate.go b/pkg/bootstrap/authenticate.go index a1078a84e5..90a2aad0cd 100644 --- a/pkg/bootstrap/authenticate.go +++ b/pkg/bootstrap/authenticate.go @@ -39,5 +39,5 @@ type VerifyResult struct { // Verifier verifies authentication credentials for requests. type Verifier interface { - VerifyToken(ctx context.Context, token string, body []byte) (*VerifyResult, error) + VerifyToken(ctx context.Context, token string, body []byte, useInstanceIDForNodeName bool) (*VerifyResult, error) } diff --git a/upup/pkg/fi/cloudup/awsup/aws_cloud.go b/upup/pkg/fi/cloudup/awsup/aws_cloud.go index 9544abdff8..3d5717af23 100644 --- a/upup/pkg/fi/cloudup/awsup/aws_cloud.go +++ b/upup/pkg/fi/cloudup/awsup/aws_cloud.go @@ -2071,8 +2071,8 @@ func GetRolesInInstanceProfile(c AWSCloud, profileName string) ([]string, error) } // GetInstanceCertificateNames returns the instance hostname and addresses that should go into certificates. -// The first value is the node name and any additional values are IP addresses. -func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput) (addrs []string, err error) { +// The first value is the node name and any additional values are the DNS name and IP addresses. +func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput, useInstanceIDForNodeName bool) (addrs []string, err error) { if len(instances.Reservations) != 1 { return nil, fmt.Errorf("too many reservations returned for the single instance-id") } @@ -2083,9 +2083,11 @@ func GetInstanceCertificateNames(instances *ec2.DescribeInstancesOutput) (addrs instance := instances.Reservations[0].Instances[0] - name := *instance.PrivateDnsName + if useInstanceIDForNodeName { + addrs = append(addrs, *instance.InstanceId) + } - addrs = append(addrs, name) + addrs = append(addrs, *instance.PrivateDnsName) // We only use data for the first interface, and only the first IP for _, iface := range instance.NetworkInterfaces { diff --git a/upup/pkg/fi/cloudup/awsup/aws_verifier.go b/upup/pkg/fi/cloudup/awsup/aws_verifier.go index 24df1ec32e..3374964774 100644 --- a/upup/pkg/fi/cloudup/awsup/aws_verifier.go +++ b/upup/pkg/fi/cloudup/awsup/aws_verifier.go @@ -120,7 +120,7 @@ type ResponseMetadata struct { RequestId string `xml:"RequestId"` } -func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte) (*bootstrap.VerifyResult, error) { +func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte, useInstanceIDForNodeName bool) (*bootstrap.VerifyResult, error) { if !strings.HasPrefix(token, AWSAuthenticationTokenPrefix) { return nil, fmt.Errorf("incorrect authorization type") } @@ -232,7 +232,7 @@ func (a awsVerifier) VerifyToken(ctx context.Context, token string, body []byte) instance := instances.Reservations[0].Instances[0] - addrs, err := GetInstanceCertificateNames(instances) + addrs, err := GetInstanceCertificateNames(instances, useInstanceIDForNodeName) if err != nil { return nil, err } diff --git a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go index 000b171f2c..a319564dfd 100644 --- a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go +++ b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go @@ -63,7 +63,7 @@ func NewTPMVerifier(opt *gcetpm.TPMVerifierOptions) (bootstrap.Verifier, error) var _ bootstrap.Verifier = &tpmVerifier{} -func (v *tpmVerifier) VerifyToken(ctx context.Context, authToken string, body []byte) (*bootstrap.VerifyResult, error) { +func (v *tpmVerifier) VerifyToken(ctx context.Context, authToken string, body []byte, useInstanceIDForNodeName bool) (*bootstrap.VerifyResult, error) { // Reminder: we shouldn't trust any data we get from the client until we've checked the signature (and even then...) // Thankfully the GCE SDK does seem to escape the parameters correctly, for example. diff --git a/upup/pkg/fi/cloudup/template_functions.go b/upup/pkg/fi/cloudup/template_functions.go index 966818c5dd..3ec3c1db4a 100644 --- a/upup/pkg/fi/cloudup/template_functions.go +++ b/upup/pkg/fi/cloudup/template_functions.go @@ -585,6 +585,10 @@ func (tf *TemplateFunctions) KopsControllerConfig() (string, error) { Region: tf.Region, } + if cluster.Spec.ExternalCloudControllerManager != nil && cluster.IsKubernetesGTE("1.23") { + config.Server.UseInstanceIDForNodeName = true + } + case kops.CloudProviderGCE: c := tf.cloud.(gce.GCECloud) @@ -599,7 +603,7 @@ func (tf *TemplateFunctions) KopsControllerConfig() (string, error) { } } - if tf.Cluster.Spec.IsKopsControllerIPAM() { + if cluster.Spec.IsKopsControllerIPAM() { config.EnableCloudIPAM = true } diff --git a/upup/pkg/fi/nodeup/command.go b/upup/pkg/fi/nodeup/command.go index 1c7d01aeb3..d5af971dcf 100644 --- a/upup/pkg/fi/nodeup/command.go +++ b/upup/pkg/fi/nodeup/command.go @@ -447,7 +447,7 @@ func completeWarmingLifecycleAction(cloud awsup.AWSCloud, modelContext *model.No } func evaluateSpec(c *NodeUpCommand, nodeupConfig *nodeup.Config) error { - hostnameOverride, err := evaluateHostnameOverride(api.CloudProviderID(c.cluster.Spec.CloudProvider)) + hostnameOverride, err := evaluateHostnameOverride(api.CloudProviderID(c.cluster.Spec.CloudProvider), nodeupConfig.UseInstanceIDForNodeName) if err != nil { return err } @@ -477,14 +477,19 @@ func evaluateSpec(c *NodeUpCommand, nodeupConfig *nodeup.Config) error { return nil } -func evaluateHostnameOverride(cloudProvider api.CloudProviderID) (string, error) { +func evaluateHostnameOverride(cloudProvider api.CloudProviderID, useInstanceIDForNodeName bool) (string, error) { switch cloudProvider { case api.CloudProviderAWS: - hostnameBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/local-hostname") - if err != nil { - return "", fmt.Errorf("error reading local-hostname from AWS metadata: %v", err) + source := "local-hostname" + if useInstanceIDForNodeName { + source = "instance-id" } - return string(hostnameBytes), nil + nodeNameBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/" + source) + if err != nil { + return "", fmt.Errorf("error reading %s from AWS metadata: %v", source, err) + } + return string(nodeNameBytes), nil + case api.CloudProviderGCE: // This lets us tolerate broken hostnames (i.e. systemd) b, err := vfs.Context.ReadFile("metadata://gce/instance/hostname") From 36a26560aab8a99a6a0ae580af0df8ac55cba46b Mon Sep 17 00:00:00 2001 From: John Gardiner Myers Date: Fri, 3 Dec 2021 18:59:47 -0800 Subject: [PATCH 2/2] Add release note about AWS node naming --- docs/releases/1.23-NOTES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/releases/1.23-NOTES.md b/docs/releases/1.23-NOTES.md index 08013de9a9..3baadbbf1b 100644 --- a/docs/releases/1.23-NOTES.md +++ b/docs/releases/1.23-NOTES.md @@ -8,6 +8,9 @@ This is a document to gather the release notes prior to the release. ## Other significant changes +* If the Kubernetes version is 1.23 or later and the external AWS Cloud Controller Manager is +being used, then Kubernetes Node resources will be named after their AWS instance ID instead of their domain name. + # Breaking changes * Support for Kubernetes version 1.17 has been removed.