mirror of https://github.com/kubernetes/kops.git
752 lines
24 KiB
Go
752 lines
24 KiB
Go
/*
|
|
Copyright 2019 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package nodeup
|
|
|
|
import (
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/base64"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/url"
|
|
"os"
|
|
"os/exec"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/aws/aws-sdk-go/aws"
|
|
"github.com/aws/aws-sdk-go/aws/awserr"
|
|
"github.com/aws/aws-sdk-go/aws/ec2metadata"
|
|
"github.com/aws/aws-sdk-go/aws/session"
|
|
"github.com/aws/aws-sdk-go/service/autoscaling"
|
|
"github.com/aws/aws-sdk-go/service/kms"
|
|
"go.uber.org/multierr"
|
|
"k8s.io/klog/v2"
|
|
"k8s.io/kops/nodeup/pkg/model"
|
|
"k8s.io/kops/nodeup/pkg/model/networking"
|
|
api "k8s.io/kops/pkg/apis/kops"
|
|
kopsmodel "k8s.io/kops/pkg/apis/kops/model"
|
|
"k8s.io/kops/pkg/apis/nodeup"
|
|
"k8s.io/kops/pkg/assets"
|
|
"k8s.io/kops/pkg/bootstrap"
|
|
"k8s.io/kops/pkg/bootstrap/pkibootstrap"
|
|
"k8s.io/kops/pkg/configserver"
|
|
"k8s.io/kops/pkg/kopscontrollerclient"
|
|
"k8s.io/kops/pkg/wellknownports"
|
|
"k8s.io/kops/upup/pkg/fi"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/awsup"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/azure"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/do"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/gce/tpm/gcetpmsigner"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/hetzner"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/openstack"
|
|
"k8s.io/kops/upup/pkg/fi/cloudup/scaleway"
|
|
"k8s.io/kops/upup/pkg/fi/nodeup/local"
|
|
"k8s.io/kops/upup/pkg/fi/nodeup/nodetasks"
|
|
"k8s.io/kops/upup/pkg/fi/secrets"
|
|
"k8s.io/kops/upup/pkg/fi/utils"
|
|
"k8s.io/kops/util/pkg/architectures"
|
|
"k8s.io/kops/util/pkg/distributions"
|
|
"k8s.io/kops/util/pkg/vfs"
|
|
)
|
|
|
|
// MaxTaskDuration is the amount of time to keep trying for; we retry for a long time - there is not really any great fallback
|
|
const MaxTaskDuration = 365 * 24 * time.Hour
|
|
|
|
// NodeUpCommand is the configuration for nodeup
|
|
type NodeUpCommand struct {
|
|
CacheDir string
|
|
ConfigLocation string
|
|
Target string
|
|
}
|
|
|
|
// Run is responsible for perform the nodeup process
|
|
func (c *NodeUpCommand) Run(out io.Writer) error {
|
|
ctx := context.Background()
|
|
|
|
var bootConfig nodeup.BootConfig
|
|
if c.ConfigLocation != "" {
|
|
b, err := vfs.Context.ReadFile(c.ConfigLocation)
|
|
if err != nil {
|
|
return fmt.Errorf("error loading configuration %q: %v", c.ConfigLocation, err)
|
|
}
|
|
|
|
err = utils.YamlUnmarshal(b, &bootConfig)
|
|
if err != nil {
|
|
return fmt.Errorf("error parsing configuration %q: %v", c.ConfigLocation, err)
|
|
}
|
|
} else {
|
|
return fmt.Errorf("ConfigLocation is required")
|
|
}
|
|
|
|
if c.CacheDir == "" {
|
|
return fmt.Errorf("CacheDir is required")
|
|
}
|
|
|
|
region, err := getRegion(ctx, &bootConfig)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err = seedRNG(ctx, &bootConfig, region); err != nil {
|
|
return err
|
|
}
|
|
|
|
var configBase vfs.Path
|
|
|
|
// If we're using a config server instead of vfs, nodeConfig will hold our configuration
|
|
var nodeConfig *nodeup.NodeConfig
|
|
|
|
if bootConfig.ConfigServer != nil && len(bootConfig.ConfigServer.Servers) > 0 {
|
|
response, err := getNodeConfigFromServers(ctx, &bootConfig, region)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get node config from server: %w", err)
|
|
}
|
|
nodeConfig = response.NodeConfig
|
|
} else if fi.ValueOf(bootConfig.ConfigBase) != "" {
|
|
var err error
|
|
configBase, err = vfs.Context.BuildVfsPath(*bootConfig.ConfigBase)
|
|
if err != nil {
|
|
return fmt.Errorf("cannot parse ConfigBase %q: %v", *bootConfig.ConfigBase, err)
|
|
}
|
|
} else {
|
|
return fmt.Errorf("ConfigBase or ConfigServer is required")
|
|
}
|
|
|
|
var nodeupConfig nodeup.Config
|
|
var nodeupConfigHash [32]byte
|
|
if nodeConfig != nil {
|
|
if err := utils.YamlUnmarshal([]byte(nodeConfig.NodeupConfig), &nodeupConfig); err != nil {
|
|
return fmt.Errorf("error parsing BootConfig config response: %v", err)
|
|
}
|
|
nodeupConfigHash = sha256.Sum256([]byte(nodeConfig.NodeupConfig))
|
|
nodeupConfig.CAs[fi.CertificateIDCA] = bootConfig.ConfigServer.CACertificates
|
|
} else if bootConfig.InstanceGroupName != "" {
|
|
nodeupConfigLocation := configBase.Join("igconfig", bootConfig.InstanceGroupRole.ToLowerString(), bootConfig.InstanceGroupName, "nodeupconfig.yaml")
|
|
|
|
b, err := nodeupConfigLocation.ReadFile(ctx)
|
|
if err != nil {
|
|
return fmt.Errorf("error loading NodeupConfig %q: %v", nodeupConfigLocation, err)
|
|
}
|
|
|
|
if err = utils.YamlUnmarshal(b, &nodeupConfig); err != nil {
|
|
return fmt.Errorf("error parsing NodeupConfig %q: %v", nodeupConfigLocation, err)
|
|
}
|
|
nodeupConfigHash = sha256.Sum256(b)
|
|
} else {
|
|
return fmt.Errorf("no instance group defined in nodeup config")
|
|
}
|
|
|
|
if bootConfig.NodeupConfigHash != "" {
|
|
if want, got := bootConfig.NodeupConfigHash, base64.StdEncoding.EncodeToString(nodeupConfigHash[:]); got != want {
|
|
return fmt.Errorf("nodeup config hash mismatch (was %q, expected %q)", got, want)
|
|
}
|
|
}
|
|
|
|
err = evaluateSpec(&nodeupConfig, bootConfig.CloudProvider)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
architecture, err := architectures.FindArchitecture()
|
|
if err != nil {
|
|
return fmt.Errorf("error determining OS architecture: %v", err)
|
|
}
|
|
|
|
distribution, err := distributions.FindDistribution("/")
|
|
if err != nil {
|
|
return fmt.Errorf("error determining OS distribution: %v", err)
|
|
}
|
|
|
|
configAssets := nodeupConfig.Assets[architecture]
|
|
assetStore := fi.NewAssetStore(c.CacheDir)
|
|
for _, asset := range configAssets {
|
|
err := assetStore.Add(asset)
|
|
if err != nil {
|
|
return fmt.Errorf("error adding asset %q: %v", asset, err)
|
|
}
|
|
}
|
|
|
|
var cloud fi.Cloud
|
|
|
|
if bootConfig.CloudProvider == api.CloudProviderAWS {
|
|
awsCloud, err := awsup.NewAWSCloud(region, nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
cloud = awsCloud
|
|
}
|
|
|
|
modelContext := &model.NodeupModelContext{
|
|
Cloud: cloud,
|
|
Architecture: architecture,
|
|
Assets: assetStore,
|
|
ConfigBase: configBase,
|
|
Distribution: distribution,
|
|
BootConfig: &bootConfig,
|
|
NodeupConfig: &nodeupConfig,
|
|
}
|
|
|
|
var secretStore fi.SecretStoreReader
|
|
var keyStore fi.KeystoreReader
|
|
if nodeConfig != nil {
|
|
modelContext.SecretStore = configserver.NewSecretStore(nodeConfig.NodeSecrets)
|
|
} else if nodeupConfig.ConfigStore.Secrets != "" {
|
|
klog.Infof("Building SecretStore at %q", nodeupConfig.ConfigStore.Secrets)
|
|
p, err := vfs.Context.BuildVfsPath(nodeupConfig.ConfigStore.Secrets)
|
|
if err != nil {
|
|
return fmt.Errorf("error building secret store path: %v", err)
|
|
}
|
|
|
|
secretStore = secrets.NewVFSSecretStoreReader(p)
|
|
modelContext.SecretStore = secretStore
|
|
} else {
|
|
return fmt.Errorf("SecretStore not set")
|
|
}
|
|
|
|
if nodeConfig != nil {
|
|
modelContext.KeyStore = configserver.NewKeyStore()
|
|
} else if nodeupConfig.ConfigStore.Keypairs != "" {
|
|
klog.Infof("Building KeyStore at %q", nodeupConfig.ConfigStore.Keypairs)
|
|
p, err := vfs.Context.BuildVfsPath(nodeupConfig.ConfigStore.Keypairs)
|
|
if err != nil {
|
|
return fmt.Errorf("error building key store path: %v", err)
|
|
}
|
|
|
|
modelContext.KeyStore = fi.NewVFSKeystoreReader(p)
|
|
keyStore = modelContext.KeyStore
|
|
} else {
|
|
return fmt.Errorf("KeyStore not set")
|
|
}
|
|
|
|
if err := modelContext.Init(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if bootConfig.CloudProvider == api.CloudProviderAWS {
|
|
instanceIDBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/instance-id")
|
|
if err != nil {
|
|
return fmt.Errorf("error reading instance-id from AWS metadata: %v", err)
|
|
}
|
|
modelContext.InstanceID = string(instanceIDBytes)
|
|
|
|
// Check if WarmPool is enabled first, to avoid additional API calls
|
|
if len(modelContext.NodeupConfig.WarmPoolImages) > 0 {
|
|
modelContext.ConfigurationMode, err = getAWSConfigurationMode(ctx, modelContext)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
modelContext.MachineType, err = getMachineType()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get machine type: %w", err)
|
|
}
|
|
|
|
// If Nvidia is enabled in the cluster, check if this instance has support for it.
|
|
nvidia := modelContext.NodeupConfig.ContainerdConfig.NvidiaGPU
|
|
if nvidia != nil && fi.ValueOf(nvidia.Enabled) {
|
|
awsCloud := cloud.(awsup.AWSCloud)
|
|
// Get the instance type's detailed information.
|
|
instanceType, err := awsup.GetMachineTypeInfo(awsCloud, modelContext.MachineType)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if instanceType.GPU {
|
|
klog.Info("instance supports GPU acceleration")
|
|
modelContext.GPUVendor = architectures.GPUVendorNvidia
|
|
}
|
|
}
|
|
} else if bootConfig.CloudProvider == api.CloudProviderOpenstack {
|
|
// NvidiaGPU possible to enable only in instance group level in OpenStack. When we assume that GPU is supported
|
|
if nodeupConfig.NvidiaGPU != nil && fi.ValueOf(nodeupConfig.NvidiaGPU.Enabled) {
|
|
klog.Info("instance supports GPU acceleration")
|
|
modelContext.GPUVendor = architectures.GPUVendorNvidia
|
|
}
|
|
}
|
|
|
|
if err := loadKernelModules(modelContext); err != nil {
|
|
return err
|
|
}
|
|
|
|
loader := &Loader{}
|
|
loader.Builders = append(loader.Builders, &model.EtcHostsBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.NTPBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.DirectoryBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.UpdateServiceBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.VolumesBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.ContainerdBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.ProtokubeBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.CloudConfigBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.FileAssetsBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.HookBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubeletBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubectlBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.LogrotateBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.ManifestsBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.PackagesBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.NvidiaBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.SecretBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.FirewallBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.SysctlBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubeAPIServerBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubeControllerManagerBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubeSchedulerBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.EtcdManagerTLSBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KubeProxyBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.KopsControllerBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.WarmPoolBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.PrefixBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.NerdctlBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &model.CrictlBuilder{NodeupModelContext: modelContext})
|
|
|
|
loader.Builders = append(loader.Builders, &networking.CommonBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &networking.CalicoBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &networking.CiliumBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &networking.AmazonVPCRoutedENIBuilder{NodeupModelContext: modelContext})
|
|
loader.Builders = append(loader.Builders, &networking.KuberouterBuilder{NodeupModelContext: modelContext})
|
|
|
|
loader.Builders = append(loader.Builders, &model.BootstrapClientBuilder{NodeupModelContext: modelContext})
|
|
taskMap, err := loader.Build()
|
|
if err != nil {
|
|
return fmt.Errorf("error building loader: %v", err)
|
|
}
|
|
|
|
for i, image := range nodeupConfig.Images[architecture] {
|
|
taskMap["LoadImage."+strconv.Itoa(i)] = &nodetasks.LoadImageTask{
|
|
Sources: image.Sources,
|
|
Hash: image.Hash,
|
|
}
|
|
}
|
|
// Protokube load image task is in ProtokubeBuilder
|
|
|
|
var target fi.NodeupTarget
|
|
|
|
switch c.Target {
|
|
case "direct":
|
|
target = &local.LocalTarget{
|
|
CacheDir: c.CacheDir,
|
|
Cloud: cloud,
|
|
}
|
|
case "dryrun":
|
|
assetBuilder := assets.NewAssetBuilder(vfs.Context, nil, nodeupConfig.KubernetesVersion, false)
|
|
target = fi.NewNodeupDryRunTarget(assetBuilder, out)
|
|
default:
|
|
return fmt.Errorf("unsupported target type %q", c.Target)
|
|
}
|
|
|
|
context, err := fi.NewNodeupContext(ctx, target, keyStore, &bootConfig, &nodeupConfig, taskMap)
|
|
if err != nil {
|
|
klog.Exitf("error building context: %v", err)
|
|
}
|
|
|
|
var options fi.RunTasksOptions
|
|
options.InitDefaults()
|
|
|
|
err = context.RunTasks(options)
|
|
if err != nil {
|
|
klog.Exitf("error running tasks: %v", err)
|
|
}
|
|
|
|
err = target.Finish(taskMap)
|
|
if err != nil {
|
|
klog.Exitf("error closing target: %v", err)
|
|
}
|
|
|
|
if nodeupConfig.EnableLifecycleHook {
|
|
if bootConfig.CloudProvider == api.CloudProviderAWS {
|
|
err := completeWarmingLifecycleAction(ctx, cloud.(awsup.AWSCloud), modelContext)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to complete lifecylce action: %w", err)
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getMachineType() (string, error) {
|
|
config := aws.NewConfig()
|
|
config = config.WithCredentialsChainVerboseErrors(true)
|
|
|
|
sess := session.Must(session.NewSession(config))
|
|
metadata := ec2metadata.New(sess)
|
|
|
|
// Get the actual instance type by querying the EC2 instance metadata service.
|
|
instanceTypeName, err := metadata.GetMetadata("instance-type")
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to get instance metadata type: %w", err)
|
|
}
|
|
return instanceTypeName, err
|
|
}
|
|
|
|
func completeWarmingLifecycleAction(ctx context.Context, cloud awsup.AWSCloud, modelContext *model.NodeupModelContext) error {
|
|
asgName := modelContext.BootConfig.InstanceGroupName + "." + modelContext.NodeupConfig.ClusterName
|
|
hookName := "kops-warmpool"
|
|
svc := cloud.Autoscaling()
|
|
hooks, err := svc.DescribeLifecycleHooksWithContext(ctx, &autoscaling.DescribeLifecycleHooksInput{
|
|
AutoScalingGroupName: &asgName,
|
|
LifecycleHookNames: []*string{&hookName},
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to find lifecycle hook %q: %w", hookName, err)
|
|
}
|
|
|
|
if len(hooks.LifecycleHooks) > 0 {
|
|
klog.Info("Found ASG lifecycle hook")
|
|
_, err := svc.CompleteLifecycleActionWithContext(ctx, &autoscaling.CompleteLifecycleActionInput{
|
|
AutoScalingGroupName: &asgName,
|
|
InstanceId: &modelContext.InstanceID,
|
|
LifecycleHookName: &hookName,
|
|
LifecycleActionResult: fi.PtrTo("CONTINUE"),
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("failed to complete lifecycle hook %q for %q: %v", hookName, modelContext.InstanceID, err)
|
|
}
|
|
klog.Info("Lifecycle action completed")
|
|
} else {
|
|
klog.Info("No ASG lifecycle hook found")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func evaluateSpec(nodeupConfig *nodeup.Config, cloudProvider api.CloudProviderID) error {
|
|
hostnameOverride, err := evaluateHostnameOverride(cloudProvider)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
nodeupConfig.KubeletConfig.HostnameOverride = hostnameOverride
|
|
|
|
if nodeupConfig.KubeProxy != nil {
|
|
nodeupConfig.KubeProxy.HostnameOverride = hostnameOverride
|
|
nodeupConfig.KubeProxy.BindAddress, err = evaluateBindAddress(nodeupConfig.KubeProxy.BindAddress)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func evaluateHostnameOverride(cloudProvider api.CloudProviderID) (string, error) {
|
|
switch cloudProvider {
|
|
case api.CloudProviderAWS:
|
|
instanceIDBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/instance-id")
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading instance-id from AWS metadata: %v", err)
|
|
}
|
|
|
|
return string(instanceIDBytes), nil
|
|
|
|
case api.CloudProviderGCE:
|
|
// This lets us tolerate broken hostnames (i.e. systemd)
|
|
b, err := vfs.Context.ReadFile("metadata://gce/instance/hostname")
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading hostname from GCE metadata: %v", err)
|
|
}
|
|
|
|
// We only want to use the first portion of the fully-qualified name
|
|
// e.g. foo.c.project.internal => foo
|
|
fullyQualified := string(b)
|
|
bareHostname := strings.Split(fullyQualified, ".")[0]
|
|
return bareHostname, nil
|
|
case api.CloudProviderDO:
|
|
vBytes, err := vfs.Context.ReadFile("metadata://digitalocean/interfaces/private/0/ipv4/address")
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading droplet private IP from DigitalOcean metadata: %v", err)
|
|
}
|
|
|
|
hostname := string(vBytes)
|
|
if hostname == "" {
|
|
return "", errors.New("private IP for digitalocean droplet was empty")
|
|
}
|
|
|
|
return hostname, nil
|
|
}
|
|
|
|
return "", nil
|
|
}
|
|
|
|
func evaluateBindAddress(bindAddress string) (string, error) {
|
|
if bindAddress == "" {
|
|
return "", nil
|
|
}
|
|
if bindAddress == "@aws" {
|
|
vBytes, err := vfs.Context.ReadFile("metadata://aws/meta-data/local-ipv4")
|
|
if err != nil {
|
|
return "", fmt.Errorf("error reading local IP from AWS metadata: %v", err)
|
|
}
|
|
|
|
// The local-ipv4 gets it's IP from the AWS.
|
|
// For now just choose the first one.
|
|
ips := strings.Fields(string(vBytes))
|
|
if len(ips) == 0 {
|
|
klog.Warningf("Local IP from AWS metadata service was empty")
|
|
return "", nil
|
|
}
|
|
|
|
ip := ips[0]
|
|
klog.Infof("Using IP from AWS metadata service: %s", ip)
|
|
|
|
return ip, nil
|
|
}
|
|
|
|
if net.ParseIP(bindAddress) == nil {
|
|
return "", fmt.Errorf("bindAddress is not valid IP address")
|
|
}
|
|
return bindAddress, nil
|
|
}
|
|
|
|
// kernelHasFilesystem checks if /proc/filesystems contains the specified filesystem
|
|
func kernelHasFilesystem(fs string) (bool, error) {
|
|
contents, err := os.ReadFile("/proc/filesystems")
|
|
if err != nil {
|
|
return false, fmt.Errorf("error reading /proc/filesystems: %v", err)
|
|
}
|
|
|
|
for _, line := range strings.Split(string(contents), "\n") {
|
|
tokens := strings.Fields(line)
|
|
for _, token := range tokens {
|
|
// Technically we should skip "nodev", but it doesn't matter
|
|
if token == fs {
|
|
return true, nil
|
|
}
|
|
}
|
|
}
|
|
|
|
return false, nil
|
|
}
|
|
|
|
// modprobe will exec `modprobe <module>`
|
|
func modprobe(module string) error {
|
|
klog.Infof("Doing modprobe for module %v", module)
|
|
out, err := exec.Command("/sbin/modprobe", module).CombinedOutput()
|
|
outString := string(out)
|
|
if err != nil {
|
|
return fmt.Errorf("modprobe for module %q failed (%v): %s", module, err, outString)
|
|
}
|
|
if outString != "" {
|
|
klog.Infof("Output from modprobe %s:\n%s", module, outString)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// loadKernelModules is a hack to force br_netfilter to be loaded
|
|
// TODO: Move to tasks architecture
|
|
func loadKernelModules(context *model.NodeupModelContext) error {
|
|
err := modprobe("br_netfilter")
|
|
if err != nil {
|
|
// TODO: Return error in 1.11 (too risky for 1.10)
|
|
klog.Warningf("error loading br_netfilter module: %v", err)
|
|
}
|
|
// TODO: Add to /etc/modules-load.d/ ?
|
|
return nil
|
|
}
|
|
|
|
// getRegion queries the cloud provider for the region.
|
|
func getRegion(ctx context.Context, bootConfig *nodeup.BootConfig) (string, error) {
|
|
switch bootConfig.CloudProvider {
|
|
case api.CloudProviderAWS:
|
|
region, err := awsup.RegionFromMetadata(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return region, nil
|
|
}
|
|
|
|
return "", nil
|
|
}
|
|
|
|
// seedRNG adds entropy to the random number generator.
|
|
func seedRNG(ctx context.Context, bootConfig *nodeup.BootConfig, region string) error {
|
|
switch bootConfig.CloudProvider {
|
|
case api.CloudProviderAWS:
|
|
config := aws.NewConfig().WithCredentialsChainVerboseErrors(true).WithRegion(region)
|
|
sess, err := session.NewSession(config)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
random, err := kms.New(sess, config).GenerateRandom(&kms.GenerateRandomInput{
|
|
NumberOfBytes: aws.Int64(64),
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("generating random seed: %v", err)
|
|
}
|
|
|
|
f, err := os.OpenFile("/dev/urandom", os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return fmt.Errorf("opening /dev/urandom: %v", err)
|
|
}
|
|
_, err = f.Write(random.Plaintext)
|
|
if err1 := f.Close(); err1 != nil && err == nil {
|
|
err = err1
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("writing /dev/urandom: %v", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// getNodeConfigFromServers queries kops-controllers for our node's configuration.
|
|
func getNodeConfigFromServers(ctx context.Context, bootConfig *nodeup.BootConfig, region string) (*nodeup.BootstrapResponse, error) {
|
|
var authenticator bootstrap.Authenticator
|
|
|
|
switch bootConfig.CloudProvider {
|
|
case api.CloudProviderAWS:
|
|
a, err := awsup.NewAWSAuthenticator(region)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderGCE:
|
|
a, err := gcetpmsigner.NewTPMAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderHetzner:
|
|
a, err := hetzner.NewHetznerAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderOpenstack:
|
|
a, err := openstack.NewOpenstackAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderDO:
|
|
a, err := do.NewAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderScaleway:
|
|
a, err := scaleway.NewScalewayAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
case api.CloudProviderAzure:
|
|
a, err := azure.NewAzureAuthenticator()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
|
|
case "metal":
|
|
a, err := pkibootstrap.NewAuthenticatorFromFile("/etc/kubernetes/kops/pki/machine/private.pem")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
authenticator = a
|
|
|
|
default:
|
|
return nil, fmt.Errorf("unsupported cloud provider for node configuration %s", bootConfig.CloudProvider)
|
|
}
|
|
|
|
var challengeListener *bootstrap.ChallengeListener
|
|
|
|
if kopsmodel.UseChallengeCallback(bootConfig.CloudProvider) {
|
|
challengeServer, err := bootstrap.NewChallengeServer(bootConfig.ClusterName, []byte(bootConfig.ConfigServer.CACertificates))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
listen := ":" + strconv.Itoa(wellknownports.NodeupChallenge)
|
|
|
|
l, err := challengeServer.NewListener(ctx, listen)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error starting challenge listener: %w", err)
|
|
}
|
|
challengeListener = l
|
|
defer challengeListener.Stop()
|
|
}
|
|
|
|
client := &kopscontrollerclient.Client{
|
|
Authenticator: authenticator,
|
|
CAs: []byte(bootConfig.ConfigServer.CACertificates),
|
|
}
|
|
|
|
var merr error
|
|
for _, server := range bootConfig.ConfigServer.Servers {
|
|
u, err := url.Parse(server)
|
|
if err != nil {
|
|
merr = multierr.Append(merr, fmt.Errorf("unable to parse configuration server url %q: %w", server, err))
|
|
continue
|
|
}
|
|
client.BaseURL = *u
|
|
|
|
request := nodeup.BootstrapRequest{
|
|
APIVersion: nodeup.BootstrapAPIVersion,
|
|
IncludeNodeConfig: true,
|
|
}
|
|
|
|
if challengeListener != nil {
|
|
request.Challenge = challengeListener.CreateChallenge()
|
|
}
|
|
|
|
var resp nodeup.BootstrapResponse
|
|
err = client.Query(ctx, &request, &resp)
|
|
if err != nil {
|
|
merr = multierr.Append(merr, err)
|
|
continue
|
|
}
|
|
return &resp, nil
|
|
}
|
|
return nil, merr
|
|
}
|
|
|
|
func getAWSConfigurationMode(ctx context.Context, c *model.NodeupModelContext) (string, error) {
|
|
// Check if WarmPool is enabled first, to avoid additional API calls
|
|
if len(c.NodeupConfig.WarmPoolImages) == 0 {
|
|
return "", nil
|
|
}
|
|
|
|
// Only worker nodes and apiservers can actually autoscale.
|
|
// We are not adding describe permissions to the other roles
|
|
role := c.BootConfig.InstanceGroupRole
|
|
if role != api.InstanceGroupRoleNode && role != api.InstanceGroupRoleAPIServer {
|
|
return "", nil
|
|
}
|
|
|
|
targetLifecycleState, err := vfs.Context.ReadFile("metadata://aws/meta-data/autoscaling/target-lifecycle-state")
|
|
if err != nil {
|
|
var awsErr awserr.RequestFailure
|
|
if errors.As(err, &awsErr) && awsErr.StatusCode() == 404 {
|
|
// The instance isn't in an ASG (karpenter, etc.)
|
|
return "", nil
|
|
}
|
|
return "", fmt.Errorf("error reading target-lifecycle-state from instance metadata: %v", err)
|
|
}
|
|
|
|
if strings.HasPrefix(string(targetLifecycleState), "Warmed:") {
|
|
klog.Info("instance is entering warm pool")
|
|
return model.ConfigurationModeWarming, nil
|
|
} else {
|
|
klog.Info("instance is entering the ASG")
|
|
return "", nil
|
|
}
|
|
}
|