From 746f886718b9bba0201f99a4a70cb227b64a63de Mon Sep 17 00:00:00 2001 From: justinsb Date: Fri, 17 Dec 2021 10:17:27 -0500 Subject: [PATCH] gce: use per instancegroup serviceaccounts We no longer set the cloudconfig serviceaccount on new clusters, and instead use a per-IG setting if this is not set. --- pkg/model/gcemodel/autoscalinggroup.go | 8 -- pkg/model/gcemodel/context.go | 40 ++++++- pkg/model/gcemodel/service_accounts.go | 81 +++++++++++++ pkg/model/gcemodel/storageacl.go | 152 +++++++++++++++++++++---- upup/pkg/fi/cloudup/apply_cluster.go | 1 + upup/pkg/fi/cloudup/new_cluster.go | 4 - 6 files changed, 243 insertions(+), 43 deletions(-) diff --git a/pkg/model/gcemodel/autoscalinggroup.go b/pkg/model/gcemodel/autoscalinggroup.go index 7696ab8650..d303292a84 100644 --- a/pkg/model/gcemodel/autoscalinggroup.go +++ b/pkg/model/gcemodel/autoscalinggroup.go @@ -156,14 +156,6 @@ func (b *AutoscalingGroupModelBuilder) buildInstanceTemplate(c *fi.ModelBuilderC } t.Subnet = b.LinkToSubnet(subnet) - if b.Cluster.Spec.CloudConfig.GCEServiceAccount != "" { - klog.Infof("VMs using Service Account: %v", b.Cluster.Spec.CloudConfig.GCEServiceAccount) - // b.Cluster.Spec.GCEServiceAccount = c.GCEServiceAccount - } else { - klog.Warning("VMs will be configured to use the GCE default compute Service Account! This is an anti-pattern") - klog.Warning("Use a pre-created Service Account with the flag: --gce-service-account=account@projectname.iam.gserviceaccount.com") - b.Cluster.Spec.CloudConfig.GCEServiceAccount = "default" - } t.ServiceAccounts = append(t.ServiceAccounts, b.LinkToServiceAccount(ig)) //labels, err := b.CloudTagsForInstanceGroup(ig) diff --git a/pkg/model/gcemodel/context.go b/pkg/model/gcemodel/context.go index d2f9b9374f..d836280ec7 100644 --- a/pkg/model/gcemodel/context.go +++ b/pkg/model/gcemodel/context.go @@ -26,6 +26,8 @@ import ( ) type GCEModelContext struct { + ProjectID string + *model.KopsModelContext } @@ -103,11 +105,37 @@ func (c *GCEModelContext) NetworkingIsGCERoutes() bool { // LinkToServiceAccount returns a link to the GCE ServiceAccount object for VMs in the given role func (c *GCEModelContext) LinkToServiceAccount(ig *kops.InstanceGroup) *gcetasks.ServiceAccount { - // This is a legacy setting because the nodes & control-plane run under the same serviceaccount - klog.Warningf("using legacy spec.cloudConfig.gceServiceAccount=%q setting", c.Cluster.Spec.CloudConfig.GCEServiceAccount) - return &gcetasks.ServiceAccount{ - Name: s("shared"), - Email: &c.Cluster.Spec.CloudConfig.GCEServiceAccount, - Shared: fi.Bool(true), + if c.Cluster.Spec.CloudConfig.GCEServiceAccount != "" { + // This is a legacy setting because the nodes & control-plane run under the same serviceaccount + klog.Warningf("using legacy spec.cloudConfig.gceServiceAccount=%q setting", c.Cluster.Spec.CloudConfig.GCEServiceAccount) + return &gcetasks.ServiceAccount{ + Name: s("shared"), + Email: &c.Cluster.Spec.CloudConfig.GCEServiceAccount, + Shared: fi.Bool(true), + } } + + role := ig.Spec.Role + + name := "" + switch role { + case kops.InstanceGroupRoleAPIServer, kops.InstanceGroupRoleMaster: + name = "control-plane" + + case kops.InstanceGroupRoleBastion: + name = "bastion" + + case kops.InstanceGroupRoleNode: + name = "node" + + default: + klog.Fatalf("unknown role %q", role) + } + + accountID := c.SafeObjectName(name) + projectID := c.ProjectID + + email := accountID + "@" + projectID + ".iam.gserviceaccount.com" + + return &gcetasks.ServiceAccount{Name: s(name), Email: s(email)} } diff --git a/pkg/model/gcemodel/service_accounts.go b/pkg/model/gcemodel/service_accounts.go index 29933c8e25..fdb0f0f0ed 100644 --- a/pkg/model/gcemodel/service_accounts.go +++ b/pkg/model/gcemodel/service_accounts.go @@ -17,6 +17,8 @@ limitations under the License. package gcemodel import ( + "k8s.io/klog/v2" + "k8s.io/kops/pkg/apis/kops" "k8s.io/kops/upup/pkg/fi" "k8s.io/kops/upup/pkg/fi/cloudup/gcetasks" ) @@ -43,5 +45,84 @@ func (b *ServiceAccountsBuilder) Build(c *fi.ModelBuilderContext) error { return nil } + doneEmails := make(map[string]bool) + for _, ig := range b.InstanceGroups { + link := b.LinkToServiceAccount(ig) + if fi.BoolValue(link.Shared) { + c.EnsureTask(link) + continue + } + + if doneEmails[*link.Email] { + continue + } + doneEmails[*link.Email] = true + + serviceAccount := &gcetasks.ServiceAccount{ + Name: link.Name, + Email: link.Email, + Lifecycle: b.Lifecycle, + } + switch ig.Spec.Role { + case kops.InstanceGroupRoleAPIServer, kops.InstanceGroupRoleMaster: + serviceAccount.Description = fi.String("kubernetes control-plane instances") + case kops.InstanceGroupRoleNode: + serviceAccount.Description = fi.String("kubernetes worker nodes") + case kops.InstanceGroupRoleBastion: + serviceAccount.Description = fi.String("bastion nodes") + default: + klog.Warningf("unknown instance role %q", ig.Spec.Role) + } + c.AddTask(serviceAccount) + + role := ig.Spec.Role + if role == kops.InstanceGroupRoleAPIServer { + // Because these share a serviceaccount, we share a role + role = kops.InstanceGroupRoleMaster + } + + if err := b.addInstanceGroupServiceAccountPermissions(c, *serviceAccount.Email, role); err != nil { + return err + } + } + + return nil +} + +func (b *ServiceAccountsBuilder) addInstanceGroupServiceAccountPermissions(c *fi.ModelBuilderContext, serviceAccountEmail string, role kops.InstanceGroupRole) error { + member := "serviceAccount:" + serviceAccountEmail + + // Ideally we would use a custom role here, but the deletion of a custom role takes 7 days, + // which means we can't easily recycle cluster names. + // If we can find a solution, we can easily switch to a custom role. + + switch role { + case kops.InstanceGroupRoleMaster: + // We reuse the GKE role + c.AddTask(&gcetasks.ProjectIAMBinding{ + Name: s("serviceaccount-control-plane"), + Lifecycle: b.Lifecycle, + + Project: s(b.ProjectID), + Member: s(member), + Role: s("roles/container.serviceAgent"), + }) + + case kops.InstanceGroupRoleNode: + // Known permissions: + // * compute.zones.list (to find out region; we could replace this with string manipulation) + // * compute.instances.list (for discovery; we don't need in the case of a load balancer or DNS) + + // We use the GCE viewer role + + c.AddTask(&gcetasks.ProjectIAMBinding{ + Name: s("serviceaccount-nodes"), + Lifecycle: b.Lifecycle, + + Project: s(b.ProjectID), + Member: s(member), + Role: s("roles/compute.viewer"), + }) + } return nil } diff --git a/pkg/model/gcemodel/storageacl.go b/pkg/model/gcemodel/storageacl.go index 4b369ac393..57d8c5436d 100644 --- a/pkg/model/gcemodel/storageacl.go +++ b/pkg/model/gcemodel/storageacl.go @@ -18,6 +18,7 @@ package gcemodel import ( "fmt" + "strings" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" @@ -40,17 +41,24 @@ type StorageAclBuilder struct { var _ fi.ModelBuilder = &NetworkModelBuilder{} // Build creates the tasks that set up storage acls -func (b *StorageAclBuilder) Build(c *fi.ModelBuilderContext) error { - serviceAccount, err := b.Cloud.ServiceAccount() - if err != nil { - return fmt.Errorf("error fetching ServiceAccount: %v", err) - } +func (b *StorageAclBuilder) Build(c *fi.ModelBuilderContext) error { if featureflag.GoogleCloudBucketACL.Enabled() { + if b.Cluster.Spec.CloudConfig.GCEServiceAccount == "" { + return fmt.Errorf("featureflag GoogleCloudBucketACL not supported with per-instancegroup GCEServiceAccount") + } + + klog.Warningf("featureflag GoogleCloudBucketACL is no longer recommended; use per-instancegroup GCEServiceAccounts instead") + + gceDefaultServiceAccount, err := b.Cloud.ServiceAccount() + if err != nil { + return fmt.Errorf("error fetching default ServiceAccount: %w", err) + } + clusterPath := b.Cluster.Spec.ConfigBase p, err := vfs.Context.BuildVfsPath(clusterPath) if err != nil { - return fmt.Errorf("cannot parse cluster path %q: %v", clusterPath, err) + return fmt.Errorf("cannot parse cluster path %q: %w", clusterPath, err) } switch p := p.(type) { @@ -62,7 +70,7 @@ func (b *StorageAclBuilder) Build(c *fi.ModelBuilderContext) error { Name: s("serviceaccount-statestore-list"), Lifecycle: b.Lifecycle, Bucket: s(p.Bucket()), - Entity: s("user-" + serviceAccount), + Entity: s("user-" + gceDefaultServiceAccount), Role: s("READER"), }) } @@ -80,27 +88,121 @@ func (b *StorageAclBuilder) Build(c *fi.ModelBuilderContext) error { buckets := sets.NewString() for _, p := range writeablePaths { - if gcsPath, ok := p.(*vfs.GSPath); ok { - bucket := gcsPath.Bucket() - if buckets.Has(bucket) { - continue - } - - klog.Warningf("adding bucket level write ACL to gs://%s to support etcd backup", bucket) - - c.AddTask(&gcetasks.StorageBucketAcl{ - Name: s("serviceaccount-backup-readwrite-" + bucket), - Lifecycle: b.Lifecycle, - Bucket: s(bucket), - Entity: s("user-" + serviceAccount), - Role: s("WRITER"), - }) - - buckets.Insert(bucket) - } else { + gcsPath, ok := p.(*vfs.GSPath) + if !ok { klog.Warningf("unknown path, can't apply IAM policy: %q", p) + continue } + + bucket := gcsPath.Bucket() + if buckets.Has(bucket) { + continue + } + buckets.Insert(bucket) + + klog.Warningf("adding bucket level write ACL to gs://%s to support etcd backup", bucket) + + c.AddTask(&gcetasks.StorageBucketAcl{ + Name: s("serviceaccount-backup-readwrite-" + bucket), + Lifecycle: b.Lifecycle, + Bucket: s(bucket), + Entity: s("user-" + gceDefaultServiceAccount), + Role: s("WRITER"), + }) + } + + return nil + } + + type serviceAccountRole struct { + Email string + Role kops.InstanceGroupRole + } + serviceAccountRoles := make(map[serviceAccountRole]bool) + + for _, ig := range b.InstanceGroups { + serviceAccount := b.LinkToServiceAccount(ig) + + email := *serviceAccount.Email + serviceAccountRoles[serviceAccountRole{Email: email, Role: ig.Spec.Role}] = true + } + + for serviceAccountRole := range serviceAccountRoles { + role := serviceAccountRole.Role + + nodeRole, err := iam.BuildNodeRoleSubject(role, false) + if err != nil { + return err + } + + buckets := sets.NewString() + + writeablePaths, err := iam.WriteableVFSPaths(b.Cluster, nodeRole) + if err != nil { + return err + } + for _, p := range writeablePaths { + gcsPath, ok := p.(*vfs.GSPath) + if !ok { + klog.Warningf("unknown path, can't apply IAM policy: %q", p) + continue + } + + bucket := gcsPath.Bucket() + if buckets.Has(bucket) { + continue + } + buckets.Insert(bucket) + + nameForTask := strings.ToLower(string(role)) + + klog.Warningf("adding bucket level write IAM for role %q to gs://%s to support etcd backup", bucket, role) + + c.AddTask(&gcetasks.StorageBucketIAM{ + Name: s("objectadmin-" + bucket + "-serviceaccount-" + nameForTask), + Lifecycle: b.Lifecycle, + Bucket: s(bucket), + Member: s("serviceAccount:" + serviceAccountRole.Email), + Role: s("roles/storage.objectAdmin"), + }) + } + + // Add bucket read permissions if we need to read from the bucket + readablePaths, err := iam.ReadableStatePaths(b.Cluster, nodeRole) + if err != nil { + return err + } + if len(readablePaths) != 0 { + p, err := vfs.Context.BuildVfsPath(b.Cluster.Spec.ConfigStore) + if err != nil { + return fmt.Errorf("cannot parse VFS path %q: %v", b.Cluster.Spec.ConfigStore, err) + } + + gcsPath, ok := p.(*vfs.GSPath) + if !ok { + klog.Warningf("unknown path, can't apply IAM policy: %q", p) + continue + } + bucket := gcsPath.Bucket() + if buckets.Has(bucket) { + // Already marked as writeable; we can skip + continue + } + buckets.Insert(bucket) + + nameForTask := strings.ToLower(string(role)) + + klog.Warningf("adding bucket level read IAM to gs://%s for role %q", bucket, role) + + c.AddTask(&gcetasks.StorageBucketIAM{ + Name: s("objectviewer-" + bucket + "-serviceaccount-" + nameForTask), + Lifecycle: b.Lifecycle, + Bucket: s(bucket), + Member: s("serviceAccount:" + serviceAccountRole.Email), + Role: s("roles/storage.objectViewer"), + }) } } + return nil } diff --git a/upup/pkg/fi/cloudup/apply_cluster.go b/upup/pkg/fi/cloudup/apply_cluster.go index 6cf580c971..f7fc9adb25 100644 --- a/upup/pkg/fi/cloudup/apply_cluster.go +++ b/upup/pkg/fi/cloudup/apply_cluster.go @@ -589,6 +589,7 @@ func (c *ApplyClusterCmd) Run(ctx context.Context) error { ) case kops.CloudProviderGCE: gceModelContext := &gcemodel.GCEModelContext{ + ProjectID: project, KopsModelContext: modelContext, } diff --git a/upup/pkg/fi/cloudup/new_cluster.go b/upup/pkg/fi/cloudup/new_cluster.go index 34c5154745..7984dc30fa 100644 --- a/upup/pkg/fi/cloudup/new_cluster.go +++ b/upup/pkg/fi/cloudup/new_cluster.go @@ -367,10 +367,6 @@ func setupVPC(opt *NewClusterOptions, cluster *api.Cluster) error { // TODO remove this logging? klog.Infof("VMs will be configured to use specified Service Account: %v", opt.GCEServiceAccount) cluster.Spec.CloudConfig.GCEServiceAccount = opt.GCEServiceAccount - } else { - klog.Warning("VMs will be configured to use the GCE default compute Service Account! This is an anti-pattern") - klog.Warning("Use a pre-created Service Account with the flag: --gce-service-account=account@projectname.iam.gserviceaccount.com") - cluster.Spec.CloudConfig.GCEServiceAccount = "default" } case api.CloudProviderOpenstack: