From 4cf52d0e51b04bf746bdb5658c3454680f823629 Mon Sep 17 00:00:00 2001 From: justinsb Date: Sun, 14 Nov 2021 11:35:10 -0500 Subject: [PATCH] GCE: Support kops-controller, including in gossip mode We discover the kops-controller in gossip mode using seeding code that calls into the GCE API, just like gossip itself does. We refactor the gossip code into a shared gcediscovery library with minimal dependencies. --- nodeup/pkg/model/bootstrap_client.go | 6 +- pkg/model/components/context.go | 1 + pkg/model/components/kubelet.go | 3 +- pkg/model/gcemodel/BUILD.bazel | 2 +- pkg/model/gcemodel/autoscalinggroup.go | 3 +- pkg/model/gcemodel/context.go | 4 +- pkg/resolver/BUILD.bazel | 8 + pkg/resolver/interface.go | 25 ++ protokube/pkg/gossip/gce/seeds.go | 115 -------- protokube/pkg/protokube/BUILD.bazel | 2 +- protokube/pkg/protokube/gce_volume.go | 53 +--- upup/pkg/fi/cloudup/gce/BUILD.bazel | 1 + upup/pkg/fi/cloudup/gce/gce_cloud.go | 16 +- .../fi/cloudup/gce/gcediscovery/BUILD.bazel | 18 ++ .../fi/cloudup/gce/gcediscovery/resolver.go | 266 ++++++++++++++++++ .../fi/cloudup/gce/gcemetadata}/BUILD.bazel | 7 +- .../fi/cloudup/gce/gcemetadata/clustername.go | 50 ++++ upup/pkg/fi/cloudup/gce/labels.go | 7 + .../gce/tpm/gcetpmverifier/BUILD.bazel | 1 + .../gce/tpm/gcetpmverifier/tpmverifier.go | 3 +- upup/pkg/fi/nodeup/BUILD.bazel | 3 + upup/pkg/fi/nodeup/command.go | 21 +- upup/pkg/fi/nodeup/nodetasks/BUILD.bazel | 1 + .../fi/nodeup/nodetasks/bootstrap_client.go | 67 ++++- 24 files changed, 491 insertions(+), 192 deletions(-) create mode 100644 pkg/resolver/BUILD.bazel create mode 100644 pkg/resolver/interface.go delete mode 100644 protokube/pkg/gossip/gce/seeds.go create mode 100644 upup/pkg/fi/cloudup/gce/gcediscovery/BUILD.bazel create mode 100644 upup/pkg/fi/cloudup/gce/gcediscovery/resolver.go rename {protokube/pkg/gossip/gce => upup/pkg/fi/cloudup/gce/gcemetadata}/BUILD.bazel (55%) create mode 100644 upup/pkg/fi/cloudup/gce/gcemetadata/clustername.go diff --git a/nodeup/pkg/model/bootstrap_client.go b/nodeup/pkg/model/bootstrap_client.go index bd3667edde..a118c85a35 100644 --- a/nodeup/pkg/model/bootstrap_client.go +++ b/nodeup/pkg/model/bootstrap_client.go @@ -48,9 +48,13 @@ func (b BootstrapClientBuilder) Build(c *fi.ModelBuilderContext) error { authenticator, err = awsup.NewAWSAuthenticator(b.Cloud.Region()) case kops.CloudProviderGCE: authenticator, err = gcetpmsigner.NewTPMAuthenticator() + // We don't use the custom resolver here in gossip mode (though we could); + // instead we use this as a check that protokube has now started. + default: - return fmt.Errorf("unsupported cloud provider %s", b.Cluster.Spec.CloudProvider) + return fmt.Errorf("unsupported cloud provider for authenticator %q", b.Cluster.Spec.CloudProvider) } + if err != nil { return err } diff --git a/pkg/model/components/context.go b/pkg/model/components/context.go index 1496796d66..66a37b75d5 100644 --- a/pkg/model/components/context.go +++ b/pkg/model/components/context.go @@ -168,6 +168,7 @@ func Image(component string, clusterSpec *kops.ClusterSpec, assetsBuilder *asset return image, nil } +// GCETagForRole returns the (network) tag for GCE instances in the given instance group role. func GCETagForRole(clusterName string, role kops.InstanceGroupRole) string { return gce.SafeClusterName(clusterName) + "-" + gce.GceLabelNameRolePrefix + strings.ToLower(string(role)) } diff --git a/pkg/model/components/kubelet.go b/pkg/model/components/kubelet.go index b9852fe124..63c8b0d40e 100644 --- a/pkg/model/components/kubelet.go +++ b/pkg/model/components/kubelet.go @@ -24,6 +24,7 @@ import ( "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/upup/pkg/fi" + "k8s.io/kops/upup/pkg/fi/cloudup/gce" "k8s.io/kops/upup/pkg/fi/loader" ) @@ -141,7 +142,7 @@ func (b *KubeletOptionsBuilder) BuildOptions(o interface{}) error { clusterSpec.CloudConfig = &kops.CloudConfiguration{} } clusterSpec.CloudConfig.Multizone = fi.Bool(true) - clusterSpec.CloudConfig.NodeTags = fi.String(GCETagForRole(b.ClusterName, kops.InstanceGroupRoleNode)) + clusterSpec.CloudConfig.NodeTags = fi.String(gce.TagForRole(b.ClusterName, kops.InstanceGroupRoleNode)) } diff --git a/pkg/model/gcemodel/BUILD.bazel b/pkg/model/gcemodel/BUILD.bazel index 0abc042515..ed8a843b7a 100644 --- a/pkg/model/gcemodel/BUILD.bazel +++ b/pkg/model/gcemodel/BUILD.bazel @@ -18,13 +18,13 @@ go_library( "//pkg/apis/kops:go_default_library", "//pkg/featureflag:go_default_library", "//pkg/model:go_default_library", - "//pkg/model/components:go_default_library", "//pkg/model/defaults:go_default_library", "//pkg/model/iam:go_default_library", "//pkg/nodeidentity/gce:go_default_library", "//pkg/wellknownports:go_default_library", "//upup/pkg/fi:go_default_library", "//upup/pkg/fi/cloudup/gce:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcemetadata:go_default_library", "//upup/pkg/fi/cloudup/gcetasks:go_default_library", "//util/pkg/vfs:go_default_library", "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", diff --git a/pkg/model/gcemodel/autoscalinggroup.go b/pkg/model/gcemodel/autoscalinggroup.go index 89bf5a5de0..71967850d6 100644 --- a/pkg/model/gcemodel/autoscalinggroup.go +++ b/pkg/model/gcemodel/autoscalinggroup.go @@ -28,6 +28,7 @@ import ( nodeidentitygce "k8s.io/kops/pkg/nodeidentity/gce" "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup/gce" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata" "k8s.io/kops/upup/pkg/fi/cloudup/gcetasks" ) @@ -97,7 +98,7 @@ func (b *AutoscalingGroupModelBuilder) buildInstanceTemplate(c *fi.ModelBuilderC Metadata: map[string]fi.Resource{ "startup-script": startupScript, //"config": resources/config.yaml $nodeset.Name - "cluster-name": fi.NewStringResource(b.ClusterName()), + gcemetadata.MetadataKeyClusterName: fi.NewStringResource(b.ClusterName()), nodeidentitygce.MetadataKeyInstanceGroupName: fi.NewStringResource(ig.Name), }, } diff --git a/pkg/model/gcemodel/context.go b/pkg/model/gcemodel/context.go index 33b705e9e1..bc6f0123fd 100644 --- a/pkg/model/gcemodel/context.go +++ b/pkg/model/gcemodel/context.go @@ -19,7 +19,6 @@ package gcemodel import ( "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/pkg/model" - "k8s.io/kops/pkg/model/components" "k8s.io/kops/upup/pkg/fi/cloudup/gce" "k8s.io/kops/upup/pkg/fi/cloudup/gcetasks" ) @@ -67,8 +66,9 @@ func (c *GCEModelContext) SafeClusterName() string { return gce.SafeClusterName(c.Cluster.ObjectMeta.Name) } +// GCETagForRole returns the (network) tag for GCE instances in the given instance group role. func (c *GCEModelContext) GCETagForRole(role kops.InstanceGroupRole) string { - return components.GCETagForRole(c.Cluster.ObjectMeta.Name, role) + return gce.TagForRole(c.Cluster.ObjectMeta.Name, role) } func (c *GCEModelContext) LinkToTargetPool(id string) *gcetasks.TargetPool { diff --git a/pkg/resolver/BUILD.bazel b/pkg/resolver/BUILD.bazel new file mode 100644 index 0000000000..0f373f1a89 --- /dev/null +++ b/pkg/resolver/BUILD.bazel @@ -0,0 +1,8 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["interface.go"], + importpath = "k8s.io/kops/pkg/resolver", + visibility = ["//visibility:public"], +) diff --git a/pkg/resolver/interface.go b/pkg/resolver/interface.go new file mode 100644 index 0000000000..c6a40f75b6 --- /dev/null +++ b/pkg/resolver/interface.go @@ -0,0 +1,25 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resolver + +import "context" + +// Resolver is implemented by alternatives resolvers (such as gossip-mode) +type Resolver interface { + // Resolve resolves the host to IP addresses or alternative hostnames. + Resolve(ctx context.Context, host string) ([]string, error) +} diff --git a/protokube/pkg/gossip/gce/seeds.go b/protokube/pkg/gossip/gce/seeds.go deleted file mode 100644 index 05209c9366..0000000000 --- a/protokube/pkg/gossip/gce/seeds.go +++ /dev/null @@ -1,115 +0,0 @@ -/* -Copyright 2017 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package gce - -import ( - "fmt" - "strings" - - compute "google.golang.org/api/compute/v1" - "k8s.io/klog/v2" - "k8s.io/kops/protokube/pkg/gossip" -) - -type SeedProvider struct { - compute *compute.Service - projectID string - region string -} - -var _ gossip.SeedProvider = &SeedProvider{} - -// Each page can have 500 results, but we cap how many pages -// are iterated through to prevent infinite loops if the API -// were to continuously return a nextPageToken. -const maxPages = 100 - -func (p *SeedProvider) GetSeeds() ([]string, error) { - zones, err := p.compute.Zones.List(p.projectID).Do() - if err != nil { - return nil, fmt.Errorf("error querying for GCE zones: %v", err) - } - - var zoneNames []string - for _, zone := range zones.Items { - regionName := lastComponent(zone.Region) - if regionName != p.region { - continue - } - zoneNames = append(zoneNames, zone.Name) - } - - var seeds []string - // TODO: Does it suffice to just query one zone (as long as we sort so it is always the first)? - // Or does that introduce edges cases where we have partitions / cliques - - for _, zoneName := range zoneNames { - pageToken := "" - page := 0 - for ; page == 0 || (pageToken != "" && page < maxPages); page++ { - listCall := p.compute.Instances.List(p.projectID, zoneName) - - // TODO: Filter by fields (but ask about google issue 29524655) - - // TODO: Match clusterid? - - if pageToken != "" { - listCall.PageToken(pageToken) - } - - res, err := listCall.Do() - if err != nil { - return nil, err - } - pageToken = res.NextPageToken - for _, i := range res.Items { - // TODO: Expose multiple IPs topologies? - - for _, ni := range i.NetworkInterfaces { - // TODO: Check e.g. Network - - if ni.NetworkIP != "" { - seeds = append(seeds, ni.NetworkIP) - } - } - } - } - if page >= maxPages { - klog.Errorf("GetSeeds exceeded maxPages=%d for Instances.List: truncating.", maxPages) - } - } - - return seeds, nil -} - -func NewSeedProvider(compute *compute.Service, region string, projectID string) (*SeedProvider, error) { - return &SeedProvider{ - compute: compute, - region: region, - projectID: projectID, - }, nil -} - -// Returns the last component of a URL, i.e. anything after the last slash -// If there is no slash, returns the whole string -func lastComponent(s string) string { - lastSlash := strings.LastIndex(s, "/") - if lastSlash != -1 { - s = s[lastSlash+1:] - } - return s -} diff --git a/protokube/pkg/protokube/BUILD.bazel b/protokube/pkg/protokube/BUILD.bazel index a7bf0b00f3..69ac770dac 100644 --- a/protokube/pkg/protokube/BUILD.bazel +++ b/protokube/pkg/protokube/BUILD.bazel @@ -32,12 +32,12 @@ go_library( "//protokube/pkg/gossip/azure:go_default_library", "//protokube/pkg/gossip/dns:go_default_library", "//protokube/pkg/gossip/do:go_default_library", - "//protokube/pkg/gossip/gce:go_default_library", "//protokube/pkg/gossip/openstack:go_default_library", "//upup/pkg/fi/cloudup/aliup:go_default_library", "//upup/pkg/fi/cloudup/awsup:go_default_library", "//upup/pkg/fi/cloudup/azure:go_default_library", "//upup/pkg/fi/cloudup/gce:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcediscovery:go_default_library", "//upup/pkg/fi/cloudup/openstack:go_default_library", "//vendor/cloud.google.com/go/compute/metadata:go_default_library", "//vendor/github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2020-06-01/compute:go_default_library", diff --git a/protokube/pkg/protokube/gce_volume.go b/protokube/pkg/protokube/gce_volume.go index cd044d3f92..55f81d7984 100644 --- a/protokube/pkg/protokube/gce_volume.go +++ b/protokube/pkg/protokube/gce_volume.go @@ -28,13 +28,14 @@ import ( "k8s.io/klog/v2" "k8s.io/kops/protokube/pkg/etcd" "k8s.io/kops/protokube/pkg/gossip" - gossipgce "k8s.io/kops/protokube/pkg/gossip/gce" "k8s.io/kops/upup/pkg/fi/cloudup/gce" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcediscovery" ) // GCEVolumes is the Volumes implementation for GCE type GCEVolumes struct { - compute *compute.Service + compute *compute.Service + discovery *gcediscovery.Discovery project string zone string @@ -48,15 +49,14 @@ var _ Volumes = &GCEVolumes{} // NewGCEVolumes builds a GCEVolumes func NewGCEVolumes() (*GCEVolumes, error) { - ctx := context.Background() - - computeService, err := compute.NewService(ctx) + discovery, err := gcediscovery.New() if err != nil { - return nil, fmt.Errorf("error building compute API client: %v", err) + return nil, err } a := &GCEVolumes{ - compute: computeService, + discovery: discovery, + compute: discovery.Compute(), } err = a.discoverTags() @@ -85,24 +85,15 @@ func (a *GCEVolumes) InternalIP() net.IP { func (a *GCEVolumes) discoverTags() error { // Cluster Name { - clusterName, err := metadata.InstanceAttributeValue("cluster-name") - if err != nil { - return fmt.Errorf("error reading cluster-name attribute from GCE: %v", err) - } - a.clusterName = strings.TrimSpace(string(clusterName)) + a.clusterName = a.discovery.ClusterName() if a.clusterName == "" { return fmt.Errorf("cluster-name metadata was empty") } - klog.Infof("Found cluster-name=%q", a.clusterName) } // Project ID { - project, err := metadata.ProjectID() - if err != nil { - return fmt.Errorf("error reading project from GCE: %v", err) - } - a.project = strings.TrimSpace(project) + a.project = a.discovery.ProjectID() if a.project == "" { return fmt.Errorf("project metadata was empty") } @@ -111,21 +102,13 @@ func (a *GCEVolumes) discoverTags() error { // Zone { - zone, err := metadata.Zone() - if err != nil { - return fmt.Errorf("error reading zone from GCE: %v", err) - } - a.zone = strings.TrimSpace(zone) + a.zone = a.discovery.Zone() if a.zone == "" { return fmt.Errorf("zone metadata was empty") } klog.Infof("Found zone=%q", a.zone) - region, err := regionFromZone(zone) - if err != nil { - return fmt.Errorf("error determining region from zone %q: %v", zone, err) - } - a.region = region + a.region = a.discovery.Region() klog.Infof("Found region=%q", a.region) } @@ -359,21 +342,9 @@ func (v *GCEVolumes) AttachVolume(volume *Volume) error { } func (g *GCEVolumes) GossipSeeds() (gossip.SeedProvider, error) { - return gossipgce.NewSeedProvider(g.compute, g.region, g.project) + return g.discovery, nil } func (g *GCEVolumes) InstanceName() string { return g.instanceName } - -// regionFromZone returns region of the gce zone. Zone names -// are of the form: ${region-name}-${ix}. -// For example, "us-central1-b" has a region of "us-central1". -// So we look for the last '-' and trim to just before that. -func regionFromZone(zone string) (string, error) { - ix := strings.LastIndex(zone, "-") - if ix == -1 { - return "", fmt.Errorf("unexpected zone: %s", zone) - } - return zone[:ix], nil -} diff --git a/upup/pkg/fi/cloudup/gce/BUILD.bazel b/upup/pkg/fi/cloudup/gce/BUILD.bazel index 0a03608994..69beaf2e34 100644 --- a/upup/pkg/fi/cloudup/gce/BUILD.bazel +++ b/upup/pkg/fi/cloudup/gce/BUILD.bazel @@ -26,6 +26,7 @@ go_library( "//pkg/util/subnet:go_default_library", "//protokube/pkg/etcd:go_default_library", "//upup/pkg/fi:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcemetadata:go_default_library", "//vendor/golang.org/x/oauth2/google:go_default_library", "//vendor/google.golang.org/api/compute/v1:go_default_library", "//vendor/google.golang.org/api/dns/v1:go_default_library", diff --git a/upup/pkg/fi/cloudup/gce/gce_cloud.go b/upup/pkg/fi/cloudup/gce/gce_cloud.go index 41bbc2bb98..74ae1872b7 100644 --- a/upup/pkg/fi/cloudup/gce/gce_cloud.go +++ b/upup/pkg/fi/cloudup/gce/gce_cloud.go @@ -34,6 +34,7 @@ import ( "k8s.io/kops/dnsprovider/pkg/dnsprovider/providers/google/clouddns" "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/upup/pkg/fi" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata" ) type GCECloud interface { @@ -323,20 +324,7 @@ func FindInstanceTemplates(c GCECloud, clusterName string) ([]*compute.InstanceT var matches []*compute.InstanceTemplate for _, t := range ts { - match := false - for _, item := range t.Properties.Metadata.Items { - if item.Key == "cluster-name" { - value := fi.StringValue(item.Value) - if strings.TrimSpace(value) == findClusterName { - match = true - } else { - match = false - break - } - } - } - - if !match { + if !gcemetadata.MetadataMatchesClusterName(findClusterName, t.Properties.Metadata) { continue } diff --git a/upup/pkg/fi/cloudup/gce/gcediscovery/BUILD.bazel b/upup/pkg/fi/cloudup/gce/gcediscovery/BUILD.bazel new file mode 100644 index 0000000000..fa33dd6bf7 --- /dev/null +++ b/upup/pkg/fi/cloudup/gce/gcediscovery/BUILD.bazel @@ -0,0 +1,18 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "go_default_library", + srcs = ["resolver.go"], + importpath = "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcediscovery", + visibility = ["//visibility:public"], + deps = [ + "//pkg/apis/kops:go_default_library", + "//pkg/resolver:go_default_library", + "//protokube/pkg/gossip:go_default_library", + "//upup/pkg/fi/cloudup/gce:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcemetadata:go_default_library", + "//vendor/cloud.google.com/go/compute/metadata:go_default_library", + "//vendor/google.golang.org/api/compute/v1:go_default_library", + "//vendor/k8s.io/klog/v2:go_default_library", + ], +) diff --git a/upup/pkg/fi/cloudup/gce/gcediscovery/resolver.go b/upup/pkg/fi/cloudup/gce/gcediscovery/resolver.go new file mode 100644 index 0000000000..5bc21417a5 --- /dev/null +++ b/upup/pkg/fi/cloudup/gce/gcediscovery/resolver.go @@ -0,0 +1,266 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gcediscovery + +import ( + "context" + "fmt" + "strings" + + "cloud.google.com/go/compute/metadata" + compute "google.golang.org/api/compute/v1" + "k8s.io/klog/v2" + "k8s.io/kops/pkg/apis/kops" + "k8s.io/kops/pkg/resolver" + "k8s.io/kops/protokube/pkg/gossip" + "k8s.io/kops/upup/pkg/fi/cloudup/gce" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata" +) + +type Discovery struct { + compute *compute.Service + projectID string + region string + zone string + clusterName string + allZonesInRegion []string +} + +var _ gossip.SeedProvider = &Discovery{} + +func (r *Discovery) GetSeeds() ([]string, error) { + var seeds []string + + // We are only finding seeds here; we don't need every result. + const maxResults = 100 + + ctx := context.TODO() + + if err := r.findInstances(ctx, func(i *compute.Instance) (bool, error) { + // TODO: Expose multiple IPs topologies? + + for _, ni := range i.NetworkInterfaces { + // TODO: Check e.g. Network + + if ni.NetworkIP != "" { + seeds = append(seeds, ni.NetworkIP) + } + } + + if len(seeds) >= maxResults { + return false, nil + } + return true, nil + }); err != nil { + return nil, err + } + + return seeds, nil +} + +func (r *Discovery) findInstances(ctx context.Context, callback func(*compute.Instance) (bool, error)) error { + // TODO: Does it suffice to just query one zone (as long as we sort so it is always the first)? + // Or does that introduce edges cases where we have partitions / cliques + + for _, zoneName := range r.allZonesInRegion { + pageToken := "" + for { + listCall := r.compute.Instances.List(r.projectID, zoneName).Context(ctx) + + // TODO: Filter by tags (but doesn't seem to be possible) + // TODO: Restrict the fields returned (but be sure to include nextPageToken!) + + if pageToken != "" { + listCall.PageToken(pageToken) + } + + res, err := listCall.Do() + if err != nil { + return err + } + pageToken = res.NextPageToken + for _, i := range res.Items { + if !gcemetadata.InstanceMatchesClusterName(r.clusterName, i) { + continue + } + + keepGoing, err := callback(i) + if err != nil { + return err + } + if !keepGoing { + // We immediately stop (even if there are still zones to visit) + return nil + } + } + + if pageToken == "" { + break + } + } + } + + return nil +} + +// ProjectID returns the GCP project ID we are running in. +func (r *Discovery) ProjectID() string { + return r.projectID +} + +// Zone returns the GCP zone we are running in (e.g. us-central-1a). +func (r *Discovery) Zone() string { + return r.zone +} + +// Region returns the GCP region we are running in (e.g. us-central-1). +func (r *Discovery) Region() string { + return r.region +} + +// ClusterName returns the kOps cluster-name we are part of. +func (r *Discovery) ClusterName() string { + return r.clusterName +} + +// Compute returns the GCP compute service we built. +func (r *Discovery) Compute() *compute.Service { + return r.compute +} + +// New builds a Discovery. +func New() (*Discovery, error) { + ctx := context.Background() + + computeService, err := compute.NewService(ctx) + if err != nil { + return nil, fmt.Errorf("error building compute API client: %v", err) + } + + myZoneName, err := metadata.Zone() + if err != nil { + return nil, fmt.Errorf("failed to get zone from metadata: %w", err) + } + + projectID, err := metadata.ProjectID() + if err != nil { + return nil, fmt.Errorf("failed to get project id from metadata: %w", err) + } + projectID = strings.TrimSpace(projectID) + + zones, err := computeService.Zones.List(projectID).Do() + if err != nil { + return nil, fmt.Errorf("error querying for GCE zones: %w", err) + } + + // Find our zone + var myZone *compute.Zone + for _, zone := range zones.Items { + if myZoneName == zone.Name { + myZone = zone + } + } + if myZone == nil { + return nil, fmt.Errorf("failed to find zone %q", myZoneName) + } + + region := lastComponent(myZone.Region) + + // Find all the zones in our region + var zoneNames []string + for _, zone := range zones.Items { + regionName := lastComponent(zone.Region) + if regionName != region { + continue + } + zoneNames = append(zoneNames, zone.Name) + } + + clusterName, err := metadata.InstanceAttributeValue(gcemetadata.MetadataKeyClusterName) + if err != nil { + return nil, fmt.Errorf("error reading cluster-name attribute from GCE: %w", err) + } + clusterName = strings.TrimSpace(clusterName) + if clusterName == "" { + return nil, fmt.Errorf("cluster-name metadata was empty") + } + klog.Infof("Found cluster-name=%q", clusterName) + + return &Discovery{ + compute: computeService, + region: region, + projectID: projectID, + zone: myZoneName, + clusterName: clusterName, + allZonesInRegion: zoneNames, + }, nil +} + +// Returns the last component of a URL, i.e. anything after the last slash +// If there is no slash, returns the whole string +func lastComponent(s string) string { + lastSlash := strings.LastIndex(s, "/") + if lastSlash != -1 { + s = s[lastSlash+1:] + } + return s +} + +var _ resolver.Resolver = &Discovery{} + +// Resolve implements resolver.Resolve, providing name -> address resolution using GCE discovery. +func (r *Discovery) Resolve(ctx context.Context, name string) ([]string, error) { + var records []string + klog.Infof("trying to resolve %q using GCEResolver", name) + + var requiredTags []string + + // We assume we are trying to resolve a component that runs on the control plane + requiredTags = append(requiredTags, gce.TagForRole(r.clusterName, kops.InstanceGroupRoleMaster)) + + if err := r.findInstances(ctx, func(i *compute.Instance) (bool, error) { + // Make sure the instance has any required tags + for _, requiredTag := range requiredTags { + hasTag := false + if i.Tags != nil { + for _, tag := range i.Tags.Items { + if requiredTag == tag { + hasTag = true + } + } + } + if !hasTag { + return true, nil + } + } + + // TODO: Expose multiple IPs topologies? + for _, ni := range i.NetworkInterfaces { + // TODO: Check e.g. Network + + if ni.NetworkIP != "" { + records = append(records, ni.NetworkIP) + } + } + + return true, nil + }); err != nil { + return nil, err + } + + return records, nil +} diff --git a/protokube/pkg/gossip/gce/BUILD.bazel b/upup/pkg/fi/cloudup/gce/gcemetadata/BUILD.bazel similarity index 55% rename from protokube/pkg/gossip/gce/BUILD.bazel rename to upup/pkg/fi/cloudup/gce/gcemetadata/BUILD.bazel index e64b9352f2..504380380a 100644 --- a/protokube/pkg/gossip/gce/BUILD.bazel +++ b/upup/pkg/fi/cloudup/gce/gcemetadata/BUILD.bazel @@ -2,12 +2,11 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "go_default_library", - srcs = ["seeds.go"], - importpath = "k8s.io/kops/protokube/pkg/gossip/gce", + srcs = ["clustername.go"], + importpath = "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata", visibility = ["//visibility:public"], deps = [ - "//protokube/pkg/gossip:go_default_library", + "//upup/pkg/fi:go_default_library", "//vendor/google.golang.org/api/compute/v1:go_default_library", - "//vendor/k8s.io/klog/v2:go_default_library", ], ) diff --git a/upup/pkg/fi/cloudup/gce/gcemetadata/clustername.go b/upup/pkg/fi/cloudup/gce/gcemetadata/clustername.go new file mode 100644 index 0000000000..67c5fb291a --- /dev/null +++ b/upup/pkg/fi/cloudup/gce/gcemetadata/clustername.go @@ -0,0 +1,50 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gcemetadata + +import ( + "strings" + + "google.golang.org/api/compute/v1" + "k8s.io/kops/upup/pkg/fi" +) + +// MetadataKeyClusterName is the key used for the metadata that specifies the cluster name. +const MetadataKeyClusterName = "cluster-name" + +// MetadataMatchesClusterName checks if the metadata has the specified cluster-name included. +func MetadataMatchesClusterName(findClusterName string, metadata *compute.Metadata) bool { + if metadata == nil { + return false + } + for _, item := range metadata.Items { + if item.Key == MetadataKeyClusterName { + value := fi.StringValue(item.Value) + if strings.TrimSpace(value) == findClusterName { + return true + } else { + return false + } + } + } + return false +} + +// InstanceMatchesClusterName checks if the instances has the specified cluster-name included. +func InstanceMatchesClusterName(findClusterName string, instance *compute.Instance) bool { + return MetadataMatchesClusterName(findClusterName, instance.Metadata) +} diff --git a/upup/pkg/fi/cloudup/gce/labels.go b/upup/pkg/fi/cloudup/gce/labels.go index e85c9476cf..bda00f1adb 100644 --- a/upup/pkg/fi/cloudup/gce/labels.go +++ b/upup/pkg/fi/cloudup/gce/labels.go @@ -21,6 +21,8 @@ import ( "fmt" "net/url" "strings" + + "k8s.io/kops/pkg/apis/kops" ) const ( @@ -59,3 +61,8 @@ func DecodeGCELabel(s string) (string, error) { } return v, nil } + +// TagForRole return the instance (network) tag used for instances with the given role. +func TagForRole(clusterName string, role kops.InstanceGroupRole) string { + return SafeClusterName(clusterName) + "-" + GceLabelNameRolePrefix + strings.ToLower(string(role)) +} diff --git a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/BUILD.bazel b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/BUILD.bazel index 5cd9adfded..2fb8f8bb22 100644 --- a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/BUILD.bazel +++ b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "//pkg/bootstrap:go_default_library", "//pkg/nodeidentity/gce:go_default_library", "//upup/pkg/fi:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcemetadata:go_default_library", "//upup/pkg/fi/cloudup/gce/tpm:go_default_library", "//vendor/google.golang.org/api/compute/v1:go_default_library", "//vendor/google.golang.org/api/googleapi:go_default_library", diff --git a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go index 000b171f2c..4c41036c08 100644 --- a/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go +++ b/upup/pkg/fi/cloudup/gce/tpm/gcetpmverifier/tpmverifier.go @@ -37,6 +37,7 @@ import ( "k8s.io/kops/pkg/bootstrap" "k8s.io/kops/pkg/nodeidentity/gce" "k8s.io/kops/upup/pkg/fi" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata" gcetpm "k8s.io/kops/upup/pkg/fi/cloudup/gce/tpm" ) @@ -136,7 +137,7 @@ func (v *tpmVerifier) VerifyToken(ctx context.Context, authToken string, body [] switch item.Key { case gce.MetadataKeyInstanceGroupName: instanceGroupName = fi.StringValue(item.Value) - case "cluster-name": + case gcemetadata.MetadataKeyClusterName: clusterName = fi.StringValue(item.Value) } } diff --git a/upup/pkg/fi/nodeup/BUILD.bazel b/upup/pkg/fi/nodeup/BUILD.bazel index df095a1db0..f74182a87f 100644 --- a/upup/pkg/fi/nodeup/BUILD.bazel +++ b/upup/pkg/fi/nodeup/BUILD.bazel @@ -19,8 +19,11 @@ go_library( "//pkg/bootstrap:go_default_library", "//pkg/configserver:go_default_library", "//pkg/kopscodecs:go_default_library", + "//pkg/resolver:go_default_library", "//upup/pkg/fi:go_default_library", "//upup/pkg/fi/cloudup/awsup:go_default_library", + "//upup/pkg/fi/cloudup/gce/gcediscovery:go_default_library", + "//upup/pkg/fi/cloudup/gce/tpm/gcetpmsigner:go_default_library", "//upup/pkg/fi/nodeup/cloudinit:go_default_library", "//upup/pkg/fi/nodeup/local:go_default_library", "//upup/pkg/fi/nodeup/nodetasks:go_default_library", diff --git a/upup/pkg/fi/nodeup/command.go b/upup/pkg/fi/nodeup/command.go index 1c7d01aeb3..27dfa34b11 100644 --- a/upup/pkg/fi/nodeup/command.go +++ b/upup/pkg/fi/nodeup/command.go @@ -43,8 +43,11 @@ import ( "k8s.io/kops/pkg/bootstrap" "k8s.io/kops/pkg/configserver" "k8s.io/kops/pkg/kopscodecs" + "k8s.io/kops/pkg/resolver" "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup/awsup" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/gcediscovery" + "k8s.io/kops/upup/pkg/fi/cloudup/gce/tpm/gcetpmsigner" "k8s.io/kops/upup/pkg/fi/nodeup/cloudinit" "k8s.io/kops/upup/pkg/fi/nodeup/local" "k8s.io/kops/upup/pkg/fi/nodeup/nodetasks" @@ -111,7 +114,7 @@ func (c *NodeUpCommand) Run(out io.Writer) error { if bootConfig.ConfigServer != nil { response, err := getNodeConfigFromServer(ctx, &bootConfig, region) if err != nil { - return err + return fmt.Errorf("failed to get node config from server: %w", err) } nodeConfig = response.NodeConfig } else if fi.StringValue(bootConfig.ConfigBase) != "" { @@ -705,6 +708,7 @@ func seedRNG(ctx context.Context, bootConfig *nodeup.BootConfig, region string) // getNodeConfigFromServer queries kops-controller for our node's configuration. func getNodeConfigFromServer(ctx context.Context, bootConfig *nodeup.BootConfig, region string) (*nodeup.BootstrapResponse, error) { var authenticator bootstrap.Authenticator + var resolver resolver.Resolver switch api.CloudProviderID(bootConfig.CloudProvider) { case api.CloudProviderAWS: @@ -713,12 +717,25 @@ func getNodeConfigFromServer(ctx context.Context, bootConfig *nodeup.BootConfig, return nil, err } authenticator = a + case api.CloudProviderGCE: + a, err := gcetpmsigner.NewTPMAuthenticator() + if err != nil { + return nil, err + } + authenticator = a + + discovery, err := gcediscovery.New() + if err != nil { + return nil, err + } + resolver = discovery default: - return nil, fmt.Errorf("unsupported cloud provider %s", bootConfig.CloudProvider) + return nil, fmt.Errorf("unsupported cloud provider for node configuration %s", bootConfig.CloudProvider) } client := &nodetasks.KopsBootstrapClient{ Authenticator: authenticator, + Resolver: resolver, } u, err := url.Parse(bootConfig.ConfigServer.Server) diff --git a/upup/pkg/fi/nodeup/nodetasks/BUILD.bazel b/upup/pkg/fi/nodeup/nodetasks/BUILD.bazel index 3ac75762ac..a25044d9f7 100644 --- a/upup/pkg/fi/nodeup/nodetasks/BUILD.bazel +++ b/upup/pkg/fi/nodeup/nodetasks/BUILD.bazel @@ -30,6 +30,7 @@ go_library( "//pkg/bootstrap:go_default_library", "//pkg/kubeconfig:go_default_library", "//pkg/pki:go_default_library", + "//pkg/resolver:go_default_library", "//upup/pkg/fi:go_default_library", "//upup/pkg/fi/cloudup:go_default_library", "//upup/pkg/fi/cloudup/awsup:go_default_library", diff --git a/upup/pkg/fi/nodeup/nodetasks/bootstrap_client.go b/upup/pkg/fi/nodeup/nodetasks/bootstrap_client.go index 319598c932..daf0204264 100644 --- a/upup/pkg/fi/nodeup/nodetasks/bootstrap_client.go +++ b/upup/pkg/fi/nodeup/nodetasks/bootstrap_client.go @@ -32,9 +32,11 @@ import ( "path" "time" + "k8s.io/klog/v2" "k8s.io/kops/pkg/apis/nodeup" "k8s.io/kops/pkg/bootstrap" "k8s.io/kops/pkg/pki" + "k8s.io/kops/pkg/resolver" "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup" ) @@ -145,26 +147,75 @@ type KopsBootstrapClient struct { // BaseURL is the base URL for the server BaseURL url.URL + // Resolver is a custom resolver that supports resolution of hostnames without requiring DNS. + // In particular, this supports gossip mode. + Resolver resolver.Resolver + httpClient *http.Client } +// dial implements a DialContext resolver function, for when a custom resolver is in use +func (b *KopsBootstrapClient) dial(ctx context.Context, network, addr string) (net.Conn, error) { + var errors []error + + host, port, err := net.SplitHostPort(addr) + if err != nil { + return nil, fmt.Errorf("cannot split host and port from %q: %w", addr, err) + } + + // TODO: cache? + addresses, err := b.Resolver.Resolve(ctx, host) + if err != nil { + return nil, err + } + + klog.Infof("resolved %q to %v", host, addresses) + + for _, addr := range addresses { + timeout := 5 * time.Second + conn, err := net.DialTimeout(network, addr+":"+port, timeout) + if err == nil { + return conn, nil + } + if err != nil { + klog.Warningf("failed to dial %q: %v", addr, err) + errors = append(errors, err) + } + } + if len(errors) == 0 { + return nil, fmt.Errorf("no addresses for %q", addr) + } + return nil, errors[0] +} + func (b *KopsBootstrapClient) QueryBootstrap(ctx context.Context, req *nodeup.BootstrapRequest) (*nodeup.BootstrapResponse, error) { if b.httpClient == nil { certPool := x509.NewCertPool() certPool.AppendCertsFromPEM(b.CAs) - b.httpClient = &http.Client{ - Timeout: time.Duration(15) * time.Second, - Transport: &http.Transport{ - TLSClientConfig: &tls.Config{ - RootCAs: certPool, - MinVersion: tls.VersionTLS12, - }, + transport := &http.Transport{ + TLSClientConfig: &tls.Config{ + RootCAs: certPool, + MinVersion: tls.VersionTLS12, }, } + + if b.Resolver != nil { + transport.DialContext = b.dial + } + + httpClient := &http.Client{ + Timeout: time.Duration(15) * time.Second, + Transport: transport, + } + + b.httpClient = httpClient } - if ips, err := net.LookupIP(b.BaseURL.Hostname()); err != nil { + // Sanity-check DNS to provide clearer diagnostic messages. + if b.Resolver != nil { + // Don't check DNS when there's a custom resolver. + } else if ips, err := net.LookupIP(b.BaseURL.Hostname()); err != nil { if dnsErr, ok := err.(*net.DNSError); ok && dnsErr.IsNotFound { return nil, fi.NewTryAgainLaterError(fmt.Sprintf("kops-controller DNS not setup yet (not found: %v)", dnsErr)) }