[cluster-autoscaler] Support using --cloud-config for clusterapi provider

- Leverage --cloud-config to allow for providing a separate kubeconfig for Cluster API management and workload cluster resources
- Allow for fallback to previous behavior when --cloud-config is not specified for backward compatibility
- Provides a --clusterapi-cloud-config-authoritative flag to disable the above fallback behavior and allow for both the management and workload cluster clients to use the in-cluster config
This commit is contained in:
Jason DeTiberus 2020-09-21 10:38:06 -04:00
parent fde90dee45
commit 150dbdeb64
No known key found for this signature in database
GPG Key ID: CBD7D7A4B41437BC
4 changed files with 112 additions and 66 deletions

View File

@ -10,14 +10,6 @@ cluster.
The cluster-api provider requires Kubernetes v1.16 or greater to run the
v1alpha3 version of the API.
## Cluster API Prerequisites
Please be aware that currently the cluster autoscaler only supports CAPI
clusters that have joined their management and workload clusters into a single
cluster. For more information about this please see the
[Cluster API Concepts documentations](https://cluster-api.sigs.k8s.io/user/concepts.html)
and the [`clusterctl move` command documentation](https://cluster-api.sigs.k8s.io/user/concepts.html).
## Starting the Autoscaler
To enable the Cluster API provider, you must first specify it in the command
@ -62,6 +54,43 @@ in the staging namespace, belonging to the purple cluster, with the label owner=
--node-group-auto-discovery=clusterapi:namespace=staging,clusterName=purple,owner=jim
```
## Connecting cluster-autoscaler to Cluster API management and workload Clusters
You will also need to provide the path to the kubeconfig(s) for the management
and workload cluster you wish cluster-autoscaler to run against. To specify the
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`
option and supply the path to the kubeconfig. If the `--kubeconfig` option is
not specified, cluster-autoscaler will attempt to use an in-cluster configuration.
To specify the kubeconfig path for the management cluster to monitor, use the
`--cloud-config` option and supply the path to the kubeconfig. If the
`--cloud-config` option is not specified it will fall back to using the kubeconfig
that was provided with the `--kubeconfig` option.
Use in-cluster config for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi
```
Use in-cluster config for workload cluster, specify kubeconfig for management cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --cloud-config=/mnt/kubeconfig
```
Use in-cluster config for management cluster, specify kubeconfig for workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/kubeconfig --clusterapi-cloud-config-authoritative
```
Use separate kubeconfigs for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig --cloud-config=/mnt/management.kubeconfig
```
Use a single provided kubeconfig for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig
```
## Enabling Autoscaling
To enable the automatic scaling of components in your cluster-api managed

View File

@ -140,30 +140,42 @@ func newProvider(
// BuildClusterAPI builds CloudProvider implementation for machine api.
func BuildClusterAPI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
externalConfig, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfigPath)
managementKubeconfig := opts.CloudConfig
if managementKubeconfig == "" && !opts.ClusterAPICloudConfigAuthoritative {
managementKubeconfig = opts.KubeConfigPath
}
managementConfig, err := clientcmd.BuildConfigFromFlags("", managementKubeconfig)
if err != nil {
klog.Fatalf("cannot build config: %v", err)
klog.Fatalf("cannot build management cluster config: %v", err)
}
workloadKubeconfig := opts.KubeConfigPath
workloadConfig, err := clientcmd.BuildConfigFromFlags("", workloadKubeconfig)
if err != nil {
klog.Fatalf("cannot build workload cluster config: %v", err)
}
// Grab a dynamic interface that we can create informers from
managementClient, err := dynamic.NewForConfig(externalConfig)
managementClient, err := dynamic.NewForConfig(managementConfig)
if err != nil {
klog.Fatalf("could not generate dynamic client for config")
}
workloadClient, err := kubernetes.NewForConfig(externalConfig)
workloadClient, err := kubernetes.NewForConfig(workloadConfig)
if err != nil {
klog.Fatalf("create kube clientset failed: %v", err)
}
managementDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(externalConfig)
managementDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(managementConfig)
if err != nil {
klog.Fatalf("create discovery client failed: %v", err)
}
cachedDiscovery := memory.NewMemCacheClient(managementDiscoveryClient)
managementScaleClient, err := scale.NewForConfig(
externalConfig,
managementConfig,
restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery),
dynamic.LegacyAPIPathResolverFunc,
scale.NewDiscoveryScaleKindResolver(managementDiscoveryClient))

View File

@ -142,4 +142,7 @@ type AutoscalingOptions struct {
AWSUseStaticInstanceList bool
// Path to kube configuration if available
KubeConfigPath string
// ClusterAPICloudConfigAuthoritative tells the Cluster API provider to treat the CloudConfig option as authoritative and
// not use KubeConfigPath as a fallback when it is not provided.
ClusterAPICloudConfigAuthoritative bool
}

View File

@ -169,10 +169,11 @@ var (
regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
clusterAPICloudConfigAuthoritative = flag.Bool("clusterapi-cloud-config-authoritative", false, "Treat the cloud-config flag authoritatively (do not fallback to using kubeconfig flag). ClusterAPI only")
)
func createAutoscalingOptions() config.AutoscalingOptions {
@ -193,54 +194,55 @@ func createAutoscalingOptions() config.AutoscalingOptions {
klog.Fatalf("Failed to parse flags: %v", err)
}
return config.AutoscalingOptions{
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag,
MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage,
OkTotalUnreadyCount: *okTotalUnreadyCount,
ScaleUpFromZero: *scaleUpFromZero,
EstimatorName: *estimatorFlag,
ExpanderName: *expanderFlag,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal,
MaxMemoryTotal: maxMemoryTotal,
MinMemoryTotal: minMemoryTotal,
GpuTotal: parsedGpuTotal,
NodeGroups: *nodeGroupsFlag,
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
ScaleDownEnabled: *scaleDownEnabled,
ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace,
ClusterName: *clusterName,
NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled,
MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag,
NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag,
MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage,
OkTotalUnreadyCount: *okTotalUnreadyCount,
ScaleUpFromZero: *scaleUpFromZero,
EstimatorName: *estimatorFlag,
ExpanderName: *expanderFlag,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal,
MaxMemoryTotal: maxMemoryTotal,
MinMemoryTotal: minMemoryTotal,
GpuTotal: parsedGpuTotal,
NodeGroups: *nodeGroupsFlag,
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
ScaleDownEnabled: *scaleDownEnabled,
ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace,
ClusterName: *clusterName,
NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled,
MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative,
}
}