[cluster-autoscaler] Support using --cloud-config for clusterapi provider

- Leverage --cloud-config to allow for providing a separate kubeconfig for Cluster API management and workload cluster resources
- Allow for fallback to previous behavior when --cloud-config is not specified for backward compatibility
- Provides a --clusterapi-cloud-config-authoritative flag to disable the above fallback behavior and allow for both the management and workload cluster clients to use the in-cluster config
This commit is contained in:
Jason DeTiberus 2020-09-21 10:38:06 -04:00
parent fde90dee45
commit 150dbdeb64
No known key found for this signature in database
GPG Key ID: CBD7D7A4B41437BC
4 changed files with 112 additions and 66 deletions

View File

@ -10,14 +10,6 @@ cluster.
The cluster-api provider requires Kubernetes v1.16 or greater to run the The cluster-api provider requires Kubernetes v1.16 or greater to run the
v1alpha3 version of the API. v1alpha3 version of the API.
## Cluster API Prerequisites
Please be aware that currently the cluster autoscaler only supports CAPI
clusters that have joined their management and workload clusters into a single
cluster. For more information about this please see the
[Cluster API Concepts documentations](https://cluster-api.sigs.k8s.io/user/concepts.html)
and the [`clusterctl move` command documentation](https://cluster-api.sigs.k8s.io/user/concepts.html).
## Starting the Autoscaler ## Starting the Autoscaler
To enable the Cluster API provider, you must first specify it in the command To enable the Cluster API provider, you must first specify it in the command
@ -62,6 +54,43 @@ in the staging namespace, belonging to the purple cluster, with the label owner=
--node-group-auto-discovery=clusterapi:namespace=staging,clusterName=purple,owner=jim --node-group-auto-discovery=clusterapi:namespace=staging,clusterName=purple,owner=jim
``` ```
## Connecting cluster-autoscaler to Cluster API management and workload Clusters
You will also need to provide the path to the kubeconfig(s) for the management
and workload cluster you wish cluster-autoscaler to run against. To specify the
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`
option and supply the path to the kubeconfig. If the `--kubeconfig` option is
not specified, cluster-autoscaler will attempt to use an in-cluster configuration.
To specify the kubeconfig path for the management cluster to monitor, use the
`--cloud-config` option and supply the path to the kubeconfig. If the
`--cloud-config` option is not specified it will fall back to using the kubeconfig
that was provided with the `--kubeconfig` option.
Use in-cluster config for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi
```
Use in-cluster config for workload cluster, specify kubeconfig for management cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --cloud-config=/mnt/kubeconfig
```
Use in-cluster config for management cluster, specify kubeconfig for workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/kubeconfig --clusterapi-cloud-config-authoritative
```
Use separate kubeconfigs for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig --cloud-config=/mnt/management.kubeconfig
```
Use a single provided kubeconfig for both management and workload cluster:
```
cluster-autoscaler --cloud-provider=clusterapi --kubeconfig=/mnt/workload.kubeconfig
```
## Enabling Autoscaling ## Enabling Autoscaling
To enable the automatic scaling of components in your cluster-api managed To enable the automatic scaling of components in your cluster-api managed

View File

@ -140,30 +140,42 @@ func newProvider(
// BuildClusterAPI builds CloudProvider implementation for machine api. // BuildClusterAPI builds CloudProvider implementation for machine api.
func BuildClusterAPI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider { func BuildClusterAPI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
externalConfig, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfigPath) managementKubeconfig := opts.CloudConfig
if managementKubeconfig == "" && !opts.ClusterAPICloudConfigAuthoritative {
managementKubeconfig = opts.KubeConfigPath
}
managementConfig, err := clientcmd.BuildConfigFromFlags("", managementKubeconfig)
if err != nil { if err != nil {
klog.Fatalf("cannot build config: %v", err) klog.Fatalf("cannot build management cluster config: %v", err)
}
workloadKubeconfig := opts.KubeConfigPath
workloadConfig, err := clientcmd.BuildConfigFromFlags("", workloadKubeconfig)
if err != nil {
klog.Fatalf("cannot build workload cluster config: %v", err)
} }
// Grab a dynamic interface that we can create informers from // Grab a dynamic interface that we can create informers from
managementClient, err := dynamic.NewForConfig(externalConfig) managementClient, err := dynamic.NewForConfig(managementConfig)
if err != nil { if err != nil {
klog.Fatalf("could not generate dynamic client for config") klog.Fatalf("could not generate dynamic client for config")
} }
workloadClient, err := kubernetes.NewForConfig(externalConfig) workloadClient, err := kubernetes.NewForConfig(workloadConfig)
if err != nil { if err != nil {
klog.Fatalf("create kube clientset failed: %v", err) klog.Fatalf("create kube clientset failed: %v", err)
} }
managementDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(externalConfig) managementDiscoveryClient, err := discovery.NewDiscoveryClientForConfig(managementConfig)
if err != nil { if err != nil {
klog.Fatalf("create discovery client failed: %v", err) klog.Fatalf("create discovery client failed: %v", err)
} }
cachedDiscovery := memory.NewMemCacheClient(managementDiscoveryClient) cachedDiscovery := memory.NewMemCacheClient(managementDiscoveryClient)
managementScaleClient, err := scale.NewForConfig( managementScaleClient, err := scale.NewForConfig(
externalConfig, managementConfig,
restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery), restmapper.NewDeferredDiscoveryRESTMapper(cachedDiscovery),
dynamic.LegacyAPIPathResolverFunc, dynamic.LegacyAPIPathResolverFunc,
scale.NewDiscoveryScaleKindResolver(managementDiscoveryClient)) scale.NewDiscoveryScaleKindResolver(managementDiscoveryClient))

View File

@ -142,4 +142,7 @@ type AutoscalingOptions struct {
AWSUseStaticInstanceList bool AWSUseStaticInstanceList bool
// Path to kube configuration if available // Path to kube configuration if available
KubeConfigPath string KubeConfigPath string
// ClusterAPICloudConfigAuthoritative tells the Cluster API provider to treat the CloudConfig option as authoritative and
// not use KubeConfigPath as a fallback when it is not provided.
ClusterAPICloudConfigAuthoritative bool
} }

View File

@ -169,10 +169,11 @@ var (
regional = flag.Bool("regional", false, "Cluster is regional.") regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.") newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar") balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled") enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
clusterAPICloudConfigAuthoritative = flag.Bool("clusterapi-cloud-config-authoritative", false, "Treat the cloud-config flag authoritatively (do not fallback to using kubeconfig flag). ClusterAPI only")
) )
func createAutoscalingOptions() config.AutoscalingOptions { func createAutoscalingOptions() config.AutoscalingOptions {
@ -193,54 +194,55 @@ func createAutoscalingOptions() config.AutoscalingOptions {
klog.Fatalf("Failed to parse flags: %v", err) klog.Fatalf("Failed to parse flags: %v", err)
} }
return config.AutoscalingOptions{ return config.AutoscalingOptions{
CloudConfig: *cloudConfig, CloudConfig: *cloudConfig,
CloudProviderName: *cloudProviderFlag, CloudProviderName: *cloudProviderFlag,
NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag, NodeGroupAutoDiscovery: *nodeGroupAutoDiscoveryFlag,
MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage, MaxTotalUnreadyPercentage: *maxTotalUnreadyPercentage,
OkTotalUnreadyCount: *okTotalUnreadyCount, OkTotalUnreadyCount: *okTotalUnreadyCount,
ScaleUpFromZero: *scaleUpFromZero, ScaleUpFromZero: *scaleUpFromZero,
EstimatorName: *estimatorFlag, EstimatorName: *estimatorFlag,
ExpanderName: *expanderFlag, ExpanderName: *expanderFlag,
IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization, IgnoreDaemonSetsUtilization: *ignoreDaemonSetsUtilization,
IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization, IgnoreMirrorPodsUtilization: *ignoreMirrorPodsUtilization,
MaxBulkSoftTaintCount: *maxBulkSoftTaintCount, MaxBulkSoftTaintCount: *maxBulkSoftTaintCount,
MaxBulkSoftTaintTime: *maxBulkSoftTaintTime, MaxBulkSoftTaintTime: *maxBulkSoftTaintTime,
MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag, MaxEmptyBulkDelete: *maxEmptyBulkDeleteFlag,
MaxGracefulTerminationSec: *maxGracefulTerminationFlag, MaxGracefulTerminationSec: *maxGracefulTerminationFlag,
MaxNodeProvisionTime: *maxNodeProvisionTime, MaxNodeProvisionTime: *maxNodeProvisionTime,
MaxNodesTotal: *maxNodesTotal, MaxNodesTotal: *maxNodesTotal,
MaxCoresTotal: maxCoresTotal, MaxCoresTotal: maxCoresTotal,
MinCoresTotal: minCoresTotal, MinCoresTotal: minCoresTotal,
MaxMemoryTotal: maxMemoryTotal, MaxMemoryTotal: maxMemoryTotal,
MinMemoryTotal: minMemoryTotal, MinMemoryTotal: minMemoryTotal,
GpuTotal: parsedGpuTotal, GpuTotal: parsedGpuTotal,
NodeGroups: *nodeGroupsFlag, NodeGroups: *nodeGroupsFlag,
ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd, ScaleDownDelayAfterAdd: *scaleDownDelayAfterAdd,
ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete, ScaleDownDelayAfterDelete: *scaleDownDelayAfterDelete,
ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure, ScaleDownDelayAfterFailure: *scaleDownDelayAfterFailure,
ScaleDownEnabled: *scaleDownEnabled, ScaleDownEnabled: *scaleDownEnabled,
ScaleDownUnneededTime: *scaleDownUnneededTime, ScaleDownUnneededTime: *scaleDownUnneededTime,
ScaleDownUnreadyTime: *scaleDownUnreadyTime, ScaleDownUnreadyTime: *scaleDownUnreadyTime,
ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold, ScaleDownUtilizationThreshold: *scaleDownUtilizationThreshold,
ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold, ScaleDownGpuUtilizationThreshold: *scaleDownGpuUtilizationThreshold,
ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount, ScaleDownNonEmptyCandidatesCount: *scaleDownNonEmptyCandidatesCount,
ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio, ScaleDownCandidatesPoolRatio: *scaleDownCandidatesPoolRatio,
ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount, ScaleDownCandidatesPoolMinCount: *scaleDownCandidatesPoolMinCount,
WriteStatusConfigMap: *writeStatusConfigMapFlag, WriteStatusConfigMap: *writeStatusConfigMapFlag,
BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag, BalanceSimilarNodeGroups: *balanceSimilarNodeGroupsFlag,
ConfigNamespace: *namespace, ConfigNamespace: *namespace,
ClusterName: *clusterName, ClusterName: *clusterName,
NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled, NodeAutoprovisioningEnabled: *nodeAutoprovisioningEnabled,
MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount, MaxAutoprovisionedNodeGroupCount: *maxAutoprovisionedNodeGroupCount,
UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout, UnremovableNodeRecheckTimeout: *unremovableNodeRecheckTimeout,
ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff, ExpendablePodsPriorityCutoff: *expendablePodsPriorityCutoff,
Regional: *regional, Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay, NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag, IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag, BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile, KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout, NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList, AWSUseStaticInstanceList: *awsUseStaticInstanceList,
ClusterAPICloudConfigAuthoritative: *clusterAPICloudConfigAuthoritative,
} }
} }