kops/pkg/model/gcemodel/autoscalinggroup.go

333 lines
9.8 KiB
Go

/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gcemodel
import (
"fmt"
"strings"
"k8s.io/klog/v2"
"k8s.io/kops/pkg/apis/kops"
"k8s.io/kops/pkg/model"
"k8s.io/kops/pkg/model/defaults"
"k8s.io/kops/pkg/model/iam"
nodeidentitygce "k8s.io/kops/pkg/nodeidentity/gce"
"k8s.io/kops/upup/pkg/fi"
"k8s.io/kops/upup/pkg/fi/cloudup/gce"
"k8s.io/kops/upup/pkg/fi/cloudup/gce/gcemetadata"
"k8s.io/kops/upup/pkg/fi/cloudup/gcetasks"
)
const (
DefaultVolumeType = "pd-standard"
)
// TODO: rework these parts to be more GCE native. ie: Managed Instance Groups > ASGs
// AutoscalingGroupModelBuilder configures AutoscalingGroup objects
type AutoscalingGroupModelBuilder struct {
*GCEModelContext
BootstrapScriptBuilder *model.BootstrapScriptBuilder
Lifecycle fi.Lifecycle
}
var _ fi.CloudupModelBuilder = &AutoscalingGroupModelBuilder{}
// Build the GCE instance template object for an InstanceGroup
// We are then able to extract out the fields when running with the clusterapi.
func (b *AutoscalingGroupModelBuilder) buildInstanceTemplate(c *fi.CloudupModelBuilderContext, ig *kops.InstanceGroup, subnet *kops.ClusterSubnetSpec) (*gcetasks.InstanceTemplate, error) {
// Indented to keep diff manageable
// TODO: Remove spurious indent
{
var err error
name := b.SafeObjectName(ig.ObjectMeta.Name)
startupScript, err := b.BootstrapScriptBuilder.ResourceNodeUp(c, ig)
if err != nil {
return nil, err
}
{
var volumeSize int32
var volumeType string
if ig.Spec.RootVolume != nil {
volumeSize = fi.ValueOf(ig.Spec.RootVolume.Size)
volumeType = fi.ValueOf(ig.Spec.RootVolume.Type)
}
if volumeSize == 0 {
volumeSize, err = defaults.DefaultInstanceGroupVolumeSize(ig.Spec.Role)
if err != nil {
return nil, err
}
}
if volumeType == "" {
volumeType = DefaultVolumeType
}
namePrefix := gce.LimitedLengthName(name, gcetasks.InstanceTemplateNamePrefixMaxLength)
network, err := b.LinkToNetwork()
if err != nil {
return nil, err
}
t := &gcetasks.InstanceTemplate{
Name: s(name),
NamePrefix: s(namePrefix),
Lifecycle: b.Lifecycle,
Network: network,
MachineType: s(ig.Spec.MachineType),
BootDiskType: s(volumeType),
BootDiskSizeGB: i64(int64(volumeSize)),
BootDiskImage: s(ig.Spec.Image),
Preemptible: fi.PtrTo(fi.ValueOf(ig.Spec.GCPProvisioningModel) == "SPOT"),
GCPProvisioningModel: ig.Spec.GCPProvisioningModel,
HasExternalIP: fi.PtrTo(subnet.Type == kops.SubnetTypePublic || subnet.Type == kops.SubnetTypeUtility || ig.IsBastion()),
Scopes: []string{
"compute-rw",
"monitoring",
"logging-write",
},
Metadata: map[string]fi.Resource{
gcemetadata.MetadataKeyClusterName: fi.NewStringResource(b.ClusterName()),
nodeidentitygce.MetadataKeyInstanceGroupName: fi.NewStringResource(ig.Name),
},
}
// Use "user-data" instead of "startup-script", for compatibility with cloud-init
if startupScript != nil {
t.Metadata["user-data"] = startupScript
}
if ig.Spec.Role == kops.InstanceGroupRoleNode {
autoscalerEnvVars := "os_distribution=ubuntu;arch=amd64;os=linux"
if strings.HasPrefix(ig.Spec.Image, "cos-cloud/") {
autoscalerEnvVars = "os_distribution=cos;arch=amd64;os=linux"
}
t.Metadata["kube-env"] = fi.NewStringResource("AUTOSCALER_ENV_VARS: " + autoscalerEnvVars)
}
stackType := "IPV4_ONLY"
if b.IsIPv6Only() {
// The subnets are dual-mode; IPV6_ONLY is not yet supported.
// This means that VMs will get an IPv4 and a /96 IPv6.
// However, pods will still be IPv6 only.
stackType = "IPV4_IPV6"
// // Ipv6AccessType must be set when enabling IPv6.
// // EXTERNAL is currently the only supported value
// ipv6AccessType := "EXTERNAL"
// t.Ipv6AccessType = &ipv6AccessType
}
t.StackType = &stackType
nodeRole, err := iam.BuildNodeRoleSubject(ig.Spec.Role, false)
if err != nil {
return nil, err
}
storagePaths, err := iam.WriteableVFSPaths(b.Cluster, nodeRole)
if err != nil {
return nil, err
}
if len(storagePaths) == 0 {
t.Scopes = append(t.Scopes, "storage-ro")
} else {
klog.Warningf("enabling storage-rw for etcd backups")
t.Scopes = append(t.Scopes, "storage-rw")
}
if len(b.SSHPublicKeys) > 0 {
var gFmtKeys []string
for _, key := range b.SSHPublicKeys {
gFmtKeys = append(gFmtKeys, fmt.Sprintf("%s: %s", fi.SecretNameSSHPrimary, key))
}
t.Metadata["ssh-keys"] = fi.NewStringResource(strings.Join(gFmtKeys, "\n"))
}
switch ig.Spec.Role {
case kops.InstanceGroupRoleControlPlane:
// Grant DNS permissions
// TODO: migrate to IAM permissions instead of oldschool scopes?
t.Scopes = append(t.Scopes, "https://www.googleapis.com/auth/ndev.clouddns.readwrite")
t.Tags = append(t.Tags, b.GCETagForRole(kops.InstanceGroupRoleControlPlane))
t.Tags = append(t.Tags, b.GCETagForRole("master"))
case kops.InstanceGroupRoleNode:
t.Tags = append(t.Tags, b.GCETagForRole(kops.InstanceGroupRoleNode))
case kops.InstanceGroupRoleBastion:
t.Tags = append(t.Tags, b.GCETagForRole(kops.InstanceGroupRoleBastion))
}
clusterLabel := gce.LabelForCluster(b.ClusterName())
roleLabel := gce.GceLabelNameRolePrefix + ig.Spec.Role.ToLowerString()
t.Labels = map[string]string{
clusterLabel.Key: clusterLabel.Value,
roleLabel: "",
gce.GceLabelNameInstanceGroup: ig.ObjectMeta.Name,
}
if ig.Spec.Role == kops.InstanceGroupRoleControlPlane {
t.Labels[gce.GceLabelNameRolePrefix+"master"] = ""
}
if gce.UsesIPAliases(b.Cluster) {
t.CanIPForward = fi.PtrTo(false)
t.AliasIPRanges = map[string]string{
b.NameForIPAliasRange("pods"): "/24",
}
} else {
t.CanIPForward = fi.PtrTo(true)
}
t.Subnet = b.LinkToSubnet(subnet)
t.ServiceAccounts = append(t.ServiceAccounts, b.LinkToServiceAccount(ig))
//labels, err := b.CloudTagsForInstanceGroup(ig)
//if err != nil {
// return fmt.Errorf("error building cloud tags: %v", err)
//}
//t.Labels = labels
t.GuestAccelerators = []gcetasks.AcceleratorConfig{}
for _, accelerator := range ig.Spec.GuestAccelerators {
t.GuestAccelerators = append(t.GuestAccelerators, gcetasks.AcceleratorConfig{
AcceleratorCount: accelerator.AcceleratorCount,
AcceleratorType: accelerator.AcceleratorType,
})
}
return t, nil
}
}
}
func (b *AutoscalingGroupModelBuilder) splitToZones(ig *kops.InstanceGroup) (map[string]int, error) {
// Indented to keep diff manageable
// TODO: Remove spurious indent
{
// AutoscalingGroup
zones, err := b.FindZonesForInstanceGroup(ig)
if err != nil {
return nil, err
}
// TODO: Duplicated from aws - move to defaults?
minSize := 1
if ig.Spec.MinSize != nil {
minSize = int(fi.ValueOf(ig.Spec.MinSize))
} else if ig.Spec.Role == kops.InstanceGroupRoleNode {
minSize = 2
}
// We have to assign instances to the various zones
// TODO: Switch to regional managed instance group
// But we can't yet use RegionInstanceGroups:
// 1) no support in terraform
// 2) we can't steer to specific zones AFAICT, only to all zones in the region
targetSizes := make([]int, len(zones))
totalSize := 0
for i := range zones {
targetSizes[i] = minSize / len(zones)
totalSize += targetSizes[i]
}
i := 0
for {
if totalSize >= minSize {
break
}
targetSizes[i]++
totalSize++
i++
if i > len(targetSizes) {
i = 0
}
}
instanceCountByZone := make(map[string]int)
for i, zone := range zones {
instanceCountByZone[zone] = targetSizes[i]
}
return instanceCountByZone, nil
}
}
func (b *AutoscalingGroupModelBuilder) Build(c *fi.CloudupModelBuilderContext) error {
for _, ig := range b.InstanceGroups {
subnets, err := b.GatherSubnets(ig)
if err != nil {
return err
}
// On GCE, instance groups cannot have multiple subnets.
// Because subnets are regional on GCE, this should not be limiting.
// (IGs can in theory support multiple zones, but in practice we don't recommend this)
if len(subnets) != 1 {
return fmt.Errorf("instanceGroup %q has multiple subnets", ig.Name)
}
subnet := subnets[0]
instanceTemplate, err := b.buildInstanceTemplate(c, ig, subnet)
if err != nil {
return err
}
c.AddTask(instanceTemplate)
instanceCountByZone, err := b.splitToZones(ig)
if err != nil {
return err
}
for zone, targetSize := range instanceCountByZone {
name := gce.NameForInstanceGroupManager(b.Cluster, ig, zone)
t := &gcetasks.InstanceGroupManager{
Name: s(name),
Lifecycle: b.Lifecycle,
Zone: s(zone),
TargetSize: fi.PtrTo(int64(targetSize)),
BaseInstanceName: s(ig.ObjectMeta.Name),
InstanceTemplate: instanceTemplate,
}
// Attach masters to load balancer if we're using one
switch ig.Spec.Role {
case kops.InstanceGroupRoleControlPlane:
if b.UseLoadBalancerForAPI() {
lbSpec := b.Cluster.Spec.API.LoadBalancer
if lbSpec != nil {
switch lbSpec.Type {
case kops.LoadBalancerTypePublic:
t.TargetPools = append(t.TargetPools, b.LinkToTargetPool("api"))
case kops.LoadBalancerTypeInternal:
klog.Warningf("Not hooking the instance group manager up to anything.")
}
}
}
}
c.AddTask(t)
}
}
return nil
}