Merge branch 'master' into oom-params

2023-01-26 13:43:34 -08:00 · 2023-01-26 13:43:34 -08:00 · c36f6ca3c4
parent 06e01ac191 5d55f99d2c
commit c36f6ca3c4
3098 changed files with 329068 additions and 181130 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,9 +1,3 @@
-#### Which component this PR applies to?
-
-<!--
-Which autoscaling component hosted in this repository (cluster-autoscaler, vertical-pod-autoscaler, addon-resizer, helm charts) this PR applies to?
-->
-
 #### What type of PR is this?

 <!--
--- a/addon-resizer/OWNERS
+++ b/addon-resizer/OWNERS
@ -5,3 +5,5 @@ reviewers:
 emeritus_approvers:
 - bskiba # 2022-09-30
 - wojtek-t # 2022-09-30
+labels:
+- addon-resizer
--- a/balancer/proposals/balancer.md
+++ b/balancer/proposals/balancer.md
@ -56,11 +56,11 @@ exposes the Scale subresource.
 // or other objects that expose the Scale subresource).
 type Balancer struct {
   metav1.TypeMeta
-   // Standard object metadata. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
+   // Standard object metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
   // +optional
   metav1.ObjectMeta
   // Specification of the Balancer behavior.
-   // More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status.
+   // More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status.
   Spec BalancerSpec
   // Current information about the Balancer.
   // +optional
--- a/charts/OWNERS
+++ b/charts/OWNERS
@ -2,3 +2,6 @@ approvers:
 - gjtempleton
 reviewers:
 - gjtempleton
+
+labels:
+- helm-charts
--- a/charts/cluster-autoscaler/Chart.yaml
+++ b/charts/cluster-autoscaler/Chart.yaml
@ -11,4 +11,4 @@ name: cluster-autoscaler
 sources:
  - https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
 type: application
-version: 9.21.0
+version: 9.21.1
--- a/charts/cluster-autoscaler/templates/_helpers.tpl
+++ b/charts/cluster-autoscaler/templates/_helpers.tpl
@ -70,10 +70,13 @@ Return the appropriate apiVersion for podsecuritypolicy.
 {{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
 {{- if semverCompare "<1.10-0" $kubeTargetVersion -}}
 {{- print "extensions/v1beta1" -}}
+{{- if semverCompare ">1.21-0" $kubeTargetVersion -}}
+{{- print "policy/v1" -}}
 {{- else -}}
 {{- print "policy/v1beta1" -}}
 {{- end -}}
 {{- end -}}
+{{- end -}}

 {{/*
 Return the appropriate apiVersion for podDisruptionBudget.
--- a/charts/cluster-autoscaler/templates/deployment.yaml
+++ b/charts/cluster-autoscaler/templates/deployment.yaml
@ -59,6 +59,11 @@ spec:
            - --nodes={{ .minSize }}:{{ .maxSize }}:{{ .name }}
            {{- end }}
          {{- end }}
+          {{- if eq .Values.cloudProvider "rancher" }}
+            {{- if .Values.cloudConfigPath }}
+            - --cloud-config={{ .Values.cloudConfigPath }}
+            {{- end }}
+          {{- end }}
          {{- if eq .Values.cloudProvider "aws" }}
            {{- if .Values.autoDiscovery.clusterName }}
            - --node-group-auto-discovery=asg:tag={{ tpl (join "," .Values.autoDiscovery.tags) . }}
--- a/cluster-autoscaler/FAQ.md
+++ b/cluster-autoscaler/FAQ.md
@ -28,6 +28,7 @@ this document:
 * [How to?](#how-to)
  * [I'm running cluster with nodes in multiple zones for HA purposes. Is that supported by Cluster Autoscaler?](#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)
  * [How can I monitor Cluster Autoscaler?](#how-can-i-monitor-cluster-autoscaler)
+  * [How can I increase the information that the CA is logging?](#how-can-i-increase-the-information-that-the-ca-is-logging)
  * [How can I see all the events from Cluster Autoscaler?](#how-can-i-see-all-events-from-cluster-autoscaler)
  * [How can I scale my cluster to just 1 node?](#how-can-i-scale-my-cluster-to-just-1-node)
  * [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
@ -104,7 +105,7 @@ __Or__ you have overridden this behaviour with one of the relevant flags. [See b

 ### Which version on Cluster Autoscaler should I use in my cluster?

-See [Cluster Autoscaler Releases](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler#releases)
+See [Cluster Autoscaler Releases](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler#releases).

 ### Is Cluster Autoscaler an Alpha, Beta or GA product?

@ -233,7 +234,7 @@ More about Pod Priority and Preemption:

 Cluster Autoscaler terminates the underlying instance in a cloud-provider-dependent manner.

-It does _not_ delete the [Node object](https://kubernetes.io/docs/concepts/architecture/nodes/#api-object) from Kubernetes. Cleaning up Node objects corresponding to terminated instances is the responsibility of the [cloud node controller](https://kubernetes.io/docs/concepts/architecture/cloud-controller/#node-controller), which can run as part of [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) or [cloud-controller-manager](https://v1-19.docs.kubernetes.io/docs/reference/command-line-tools-reference/cloud-controller-manager/).
+It does _not_ delete the [Node object](https://kubernetes.io/docs/concepts/architecture/nodes/#api-object) from Kubernetes. Cleaning up Node objects corresponding to terminated instances is the responsibility of the [cloud node controller](https://kubernetes.io/docs/concepts/architecture/cloud-controller/#node-controller), which can run as part of [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) or [cloud-controller-manager](https://kubernetes.io/docs/concepts/architecture/cloud-controller/).


 ****************
@ -735,6 +736,7 @@ The following startup parameters are supported for cluster autoscaler:
 | `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | ""
 | `cloud-config` | The path to the cloud provider configuration file.  Empty string for no configuration file | ""
 | `namespace` | Namespace in which cluster-autoscaler run | "kube-system"
+| `scale-up-node-group-to-min-size-enabled` | Should CA scale up the node group to the configured min size if needed | false
 | `scale-down-enabled` | Should CA scale down the cluster | true
 | `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes
 | `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval
@ -867,7 +869,7 @@ This limitation was solved with
 introduced as beta in Kubernetes 1.11 and planned for GA in 1.13.
 To allow CA to take advantage of topological scheduling, use separate node groups per zone.
 This way CA knows exactly which node group will create nodes in the required zone rather than relying on the cloud provider choosing a zone for a new node in a multi-zone node group.
-When using separate node groups per zone, the `--balance-similar-node-groups` flag will keep nodes balanced across zones for workloads that dont require topological scheduling.
+When using separate node groups per zone, the `--balance-similar-node-groups` flag will keep nodes balanced across zones for workloads that don't require topological scheduling.

 ### CA doesn’t work, but it used to work yesterday. Why?

@ -907,6 +909,23 @@ There are three options:
    * on nodes,
    * on kube-system/cluster-autoscaler-status config map.

+### How can I increase the information that the CA is logging?
+
+By default, the Cluster Autoscaler will be conservative about the log messages that it emits.
+This is primarily due to performance degradations in scenarios where clusters have a large
+number of nodes (> 100). In these cases excess log messages will lead to the log storage
+filling more quickly, and in some cases (eg clusters with >1000 nodes) the processing
+performance of the Cluster Autoscaler can be impacted.
+
+The `--v` flag controls how verbose the Cluster Autoscaler will be when running. In most
+cases using a value of `--v=0` or `--v=1` will be sufficient to monitor its activity.
+If you would like to have more information, especially about the scaling decisions made
+by the Cluster Autoscaler, then setting a value of `--v=4` is recommended. If you are
+debugging connection issues between the Cluster Autoscaler and the Kubernetes API server,
+or infrastructure endpoints, then setting a value of `--v=9` will show all the individual
+HTTP calls made. Be aware that using verbosity levels higher than `--v=1` will generate
+an increased amount of logs, prepare your deployments and storage accordingly.
+
 ### What events are emitted by CA?

 Whenever Cluster Autoscaler adds or removes nodes it will create events
@ -948,7 +967,14 @@ Events:
 ```
 ### My cluster is below minimum / above maximum number of nodes, but CA did not fix that! Why?

-Cluster Autoscaler will not scale the cluster beyond these limits, but does not enforce them. If your cluster is below the minimum number of nodes configured for Cluster Autoscaler, it will be scaled up *only* in presence of unschedulable pods.
+Cluster Autoscaler will not scale the cluster beyond these limits, but some other external factors could make this happen. Here are some common scenarios.
+* Existing nodes were deleted from K8s and the cloud provider, which could cause the cluster fell below the minimum number of nodes.
+* New nodes were added directly to the cloud provider, which could cause the cluster exceeded the maximum number of nodes.
+* Cluster Autoscaler was turned on in the middle of the cluster lifecycle, and the initial number of nodes might beyond these limits.
+
+By default, Cluster Autoscaler does not enforce the node group size. If your cluster is below the minimum number of nodes configured for CA, it will be scaled up *only* in presence of unschedulable pods. On the other hand, if your cluster is above the minimum number of nodes configured for CA, it will be scaled down *only* if it has unneeded nodes.
+
+Starting with CA 1.26.0, a new flag `--enforce-node-group-min-size` was introduced to enforce the node group minimum size. For node groups with fewer nodes than the configuration, CA will scale them up to the minimum number of nodes. To enable this feature, please set it to `true` in the command.

 ### What happens in scale-up when I have no more quota in the cloud provider?

--- a/cluster-autoscaler/Makefile
+++ b/cluster-autoscaler/Makefile
@ -28,6 +28,10 @@ ifdef DOCKER_RM
 else
  RM_FLAG=
 endif
+ifndef AWS_REGION
+  AWS_REGION=$(shell aws configure get region)
+endif
+
 IMAGE=$(REGISTRY)/cluster-autoscaler$(PROVIDER)

 export DOCKER_CLI_EXPERIMENTAL := enabled
@ -78,7 +82,7 @@ clean-arch-%:
 	rm -f cluster-autoscaler-$*

 generate:
-	go generate ./cloudprovider/aws
+	AWS_REGION=$(AWS_REGION) go generate ./cloudprovider/aws

 format:
 	test -z "$$(find . -path ./vendor -prune -type f -o -name '*.go' -exec gofmt -s -d {} + | tee /dev/stderr)" || \
--- a/cluster-autoscaler/OWNERS
+++ b/cluster-autoscaler/OWNERS
@ -1,8 +1,11 @@
 approvers:
 - feiskyer
 - towca
+- x13n
 reviewers:
 - feiskyer
 - x13n
 emeritus_approvers:
 - aleksandra-malinowska # 2022-09-30
+labels:
+- cluster-autoscaler
--- a/cluster-autoscaler/README.md
+++ b/cluster-autoscaler/README.md
@ -14,27 +14,31 @@ An FAQ is available [HERE](./FAQ.md).

 You should also take a look at the notes and "gotchas" for your specific cloud provider:
 * [AliCloud](./cloudprovider/alicloud/README.md)
-* [Azure](./cloudprovider/azure/README.md)
 * [AWS](./cloudprovider/aws/README.md)
+* [Azure](./cloudprovider/azure/README.md)
 * [BaiduCloud](./cloudprovider/baiducloud/README.md)
+* [BizflyCloud](./cloudprovider/bizflycloud/README.md)
 * [Brightbox](./cloudprovider/brightbox/README.md)
 * [CherryServers](./cloudprovider/cherryservers/README.md)
+* [Civo](./cloudprovider/civo/README.md)
 * [CloudStack](./cloudprovider/cloudstack/README.md)
-* [HuaweiCloud](./cloudprovider/huaweicloud/README.md)
+* [ClusterAPI](./cloudprovider/clusterapi/README.md)
+* [DigitalOcean](./cloudprovider/digitalocean/README.md)
+* [Exoscale](./cloudprovider/exoscale/README.md)
+* [Equinix Metal](./cloudprovider/packet/README.md#notes)
 * [External gRPC](./cloudprovider/externalgrpc/README.md)
 * [Hetzner](./cloudprovider/hetzner/README.md)
-* [Equinix Metal](./cloudprovider/packet/README.md#notes) 
+* [HuaweiCloud](./cloudprovider/huaweicloud/README.md)
 * [IonosCloud](./cloudprovider/ionoscloud/README.md)
-* [OVHcloud](./cloudprovider/ovhcloud/README.md)
-* [Linode](./cloudprovider/linode/README.md)
-* [OracleCloud](./cloudprovider/oci/README.md)
-* [ClusterAPI](./cloudprovider/clusterapi/README.md)
-* [BizflyCloud](./cloudprovider/bizflycloud/README.md)
-* [Vultr](./cloudprovider/vultr/README.md)
-* [TencentCloud](./cloudprovider/tencentcloud/README.md)
-* [Scaleway](./cloudprovider/scaleway/README.md)
-* [Rancher](./cloudprovider/rancher/README.md)
 * [Kamatera](./cloudprovider/kamatera/README.md)
+* [Linode](./cloudprovider/linode/README.md)
+* [Magnum](./cloudprovider/magnum/README.md)
+* [OracleCloud](./cloudprovider/oci/README.md)
+* [OVHcloud](./cloudprovider/ovhcloud/README.md)
+* [Rancher](./cloudprovider/rancher/README.md)
+* [Scaleway](./cloudprovider/scaleway/README.md)
+* [TencentCloud](./cloudprovider/tencentcloud/README.md)
+* [Vultr](./cloudprovider/vultr/README.md)

 # Releases

@ -164,23 +168,27 @@ Supported cloud providers:
 * GKE https://cloud.google.com/container-engine/docs/cluster-autoscaler
 * AWS https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md
 * Azure https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/azure/README.md
-* Alibaba Cloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/alicloud/README.md
+* AliCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/alicloud/README.md
+* BaiduCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/baiducloud/README.md
+* BizflyCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/bizflycloud/README.md
 * Brightbox https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/brightbox/README.md
 * CherryServers https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cherryservers/README.md
-* OpenStack Magnum https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/magnum/README.md
-* DigitalOcean https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/digitalocean/README.md
+* Civo https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/civo/README.md
 * CloudStack https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cloudstack/README.md
+* ClusterAPI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md
+* DigitalOcean https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/digitalocean/README.md
 * Exoscale https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md
 * Equinix Metal https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md
 * External gRPC https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/externalgrpc/README.md
-* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
-* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
-* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
 * Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md
-* Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md
-* Vultr https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/vultr/README.md
-* TencentCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/tencentcloud/README.md
-* BaiduCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/baiducloud/README.md
 * HuaweiCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/huaweicloud/README.md
-* Rancher https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/rancher/README.md
+* IonosCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ionoscloud/README.md
 * Kamatera https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/kamatera/README.md
+* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
+* Magnum https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/magnum/README.md
+* OracleCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
+* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
+* Rancher https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/rancher/README.md
+* Scaleway https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/scaleway/README.md
+* TencentCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/tencentcloud/README.md
+* Vultr https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/vultr/README.md
--- a/cluster-autoscaler/cloudprovider/POLICY.md
+++ b/cluster-autoscaler/cloudprovider/POLICY.md
@ -0,0 +1,144 @@
+# Cloudprovider policy
+
+As of the moment this policy is written (September 2022) Cluster Autoscaler has
+integrations with almost 30 different cloudproviders. At the same time there
+are only a handful of core CA maintainers. The maintainers don't have the
+capacity to build new integrations or maintain existing ones. In most cases they
+also have no experience with particular clouds and no access to a test
+environment.
+
+Due to above reasons each integration is required to have a set of OWNERS who
+are responsible for development and maintenance of the integration. This
+document describes the role and responsibilities of core maintainers and
+integration owners. A lot of what is described below has been unofficial
+practice for multiple years now, but this policy also introduces some new
+requirements for cloudprovider maintenance.
+
+## Responsbilities
+
+Cloudprovider owners are responsible for:
+
+  * Maintaining their integrations.
+  * Testing their integrations. Currently any new CA release is tested e2e on
+    GCE, testing on other platforms is the responsibility of cloudprovider
+    maintainers (note: there is an effort to make automated e2e tests possible
+    to run on other providers, so this may improve in the future).
+  * Addressing any issues raised in autoscaler github repository related to a
+    given provider.
+  * Reviewing any pull requests to their cloudprovider.
+    * Pull requests that only change cloudprovider code do not require any
+      review or approval from core maintainers.
+    * Pull requests that change cloudprovider and core code require approval
+      from both the cloudprovider owner and core maintainer.
+
+The core maintainers will generally not interfere with cloudprovider
+development, but they may take the following actions without seeking approval
+from cloudprovider owners:
+
+  * Make trivial changes to cloudproviders when needed to implement changes in
+    CA core (ex. updating function signatures when a go interface
+    changes).
+  * Revert any pull requests that break tests, prevent CA from compiling, etc.
+    This includes pull requests adding new providers if they cause the tests to
+    start failing or break the rules defined below.
+
+## Adding new cloud provider integration
+
+### External provider
+
+One way to integrate CA with a cloudprovider is to use existing
+[External
+gRPC](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/externalgrpc)
+provider. Integrating with gRPC interface may be easier than implementing an
+in-tree cloudprovider and the gRPC provider comes with some essential caching
+built in.
+
+An external cloudprovider implementation doesn't live in this repository and is
+not a part of CA image. As such it is also not a subject to this policy.
+
+### In-tree provider
+
+An alternative to External gRPC provider is an in-tree cloudprovider
+integration. An in-tree provider allows more customization (ex. by implementing
+[custom processors](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/processors)
+that integrate with a specific provider), but it requires significantly more effort to
+implement and maintain.
+
+In order to add new in-tree integration you need to open a pull request implementing
+the interfaces defined in cloud\_provider.go. This policy requires that any new
+in-tree cloudprovider follows the following rules:
+
+  * Cloudprovider needs to have an OWNERS file that lists its maintainers.
+    Kubernetes policy requires that code OWNERS are members of the Kubernetes
+    organization.
+    * It is required that both reviewers and approvers sections of OWNERS file
+      are non-empty.
+    * This can create a chicken and egg problem, where adding a cloudprovider
+      requires being a member of Kubernetes org and becoming a member of the
+      organization requires a history of code contributions. For this reason it
+      is allowed for the OWNERS file to temporarily contain commented out github
+      handles. There is an expectation that at least some of the owners will
+      join Kubernetes organization (by following the
+      [process](https://github.com/kubernetes/community/blob/master/community-membership.md))
+      within one release cycly, so that they can approve PRs to their
+      cloudprovider.
+  * Cloudprovider shouldn't introduce new dependencies (such as clients/SDKs)
+    to top-level go.mod vendor, unless those dependencies are already imported
+    by kubernetes/kubernetes repository and the same version of the library is
+    used by CA and Kubernetes. This requirement is mainly driven by
+    the problems with version conflicts in transitive dependencies we've
+    experienced in the past.
+    * Cloudproviders are welcome to carry their dependencies inside their
+      directories as needed.
+
+Note: Any functions in cloud\_provider.go marked as 'Implementation optional'
+may be left unimplemented. Those functions provide additional functionality, but
+are not critical. To leave a function unimplemented just have it return
+cloudprovider.ErrNotImplemented.
+
+## Cloudprovider maintenance requirements
+
+In order to allow code changes to Cluster Autoscaler that would require
+non-trivial changes in cloudproviders this policy introduces _Cloudprovider
+maintenance request_ (CMR) mechanism.
+
+ * CMR will be issued via a github issue tagging all
+   cloudprovider owners and describing the problem being solved and the changes
+   requested.
+ * CMR will clearly state the minor version in which the changes are expected
+   (ex. 1.26).
+ * CMR will need to be discussed on sig-autoscaling meeting and approved by
+   sig leads before being issued. It will also be announced on sig-autoscaling
+   slack channel and highlited in sig-autoscaling meeting notes.
+ * A CMR may be issued no later then [enhancements
+   freeze](https://github.com/kubernetes/sig-release/blob/master/releases/release_phases.md#enhancements-freeze)
+   of a given Kubernetes minor version.
+ * If a given cloud provider was added more than one release cycle ago and there
+   are no valid OWNERS, CMR should request OWNERS file update.
+
+Cloudprovider owners will be required to address CMR or request an exception via
+the CMR github issue. A failure to take any action will result in cloudprovider
+being considered abandoned and marking it as deprecated as described below.
+
+### Empty maintenance request
+
+If no CMRs are issued in a given minor release, core maintainers will issue an
+_empty CMR_. The purpose of an empty CMR is to verify that cloudprovider owners
+are still actively maintaining their integration. The only action required for
+an empty CMR is replying on the github issue. Only one owner from each
+cloudprovider needs to reply on the issue.
+
+Empty CMR follows the same rules as any other CMR. In particular it needs to be
+issued by enhancements freeze.
+
+### Cloudprovider deprecation and deletion
+
+If cloudprovider owners fail to take actions described above, the particular
+integration will be marked as deprecated in the next CA minor release. A
+deprecated cloudprovider will be completely removed after 1 year as per
+[Kubernetes deprecation
+policy](https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-feature-or-behavior).
+
+A deprecated cloudprovider may become maintained again if the owners become
+active again or new owners step up. In order to regain maintained status any
+outstanding CMRs will need to be addressed.
--- a/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/alicloud/alicloud_cloud_provider.go
@ -127,6 +127,11 @@ func (ali *aliCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
 	return ali.manager.GetAsgForInstance(instanceId)
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (ali *aliCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 func (ali *aliCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
--- a/cluster-autoscaler/cloudprovider/alicloud/examples/cluster-autoscaler-standard.yaml
+++ b/cluster-autoscaler/cloudprovider/alicloud/examples/cluster-autoscaler-standard.yaml
@ -45,7 +45,7 @@ rules:
  resources: ["statefulsets", "replicasets", "daemonsets"]
  verbs: ["watch","list","get"]
 - apiGroups: ["storage.k8s.io"]
-  resources: ["storageclasses"]
+  resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
  verbs: ["watch","list","get"]

 ---
@ -109,7 +109,7 @@ metadata:
 type: Opaque
 data:
  access-key-id: [YOUR_BASE64_AK_ID]
-  access-key-id: [YOUR_BASE64_AK_SECRET]
+  access-key-secret: [YOUR_BASE64_AK_SECRET]
  region-id: [YOUR_BASE64_REGION_ID]

 ---
--- a/cluster-autoscaler/cloudprovider/aws/OWNERS
+++ b/cluster-autoscaler/cloudprovider/aws/OWNERS
@ -4,3 +4,7 @@ approvers:
 - drmorr0
 emeritus_approvers:
 - Jeffwan
+reviewers:
+- jaypipes
+- gjtempleton
+- drmorr0
--- a/cluster-autoscaler/cloudprovider/aws/README.md
+++ b/cluster-autoscaler/cloudprovider/aws/README.md
@ -47,6 +47,7 @@ should be updated to restrict the resources/add conditionals:
        "autoscaling:DescribeAutoScalingGroups",
        "autoscaling:DescribeAutoScalingInstances",
        "autoscaling:DescribeLaunchConfigurations",
+        "autoscaling:DescribeScalingActivities",
        "autoscaling:DescribeTags",
        "ec2:DescribeInstanceTypes",
        "ec2:DescribeLaunchTemplateVersions"
@ -164,9 +165,12 @@ Auto-Discovery Setup is the preferred method to configure Cluster Autoscaler.

 To enable this, provide the `--node-group-auto-discovery` flag as an argument
 whose value is a list of tag keys that should be looked for. For example,
-`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/<cluster-name>,my-custom-tag=custom-value`
-will find the ASGs that have the given tags. Optionally, a value can be provided
-for each tag as well.
+`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/<cluster-name>`
+will find the ASGs that have at least all the given tags. Without the tags, the Cluster Autoscaler will be unable to add new instances
+to the ASG as it has not been discovered. In the example, a value is not given for the tags and in this case any value will be ignored and
+will be arbitrary - only the tag name matters. Optionally, the tag value can be set to be usable and custom tags can also be added. For example,
+`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled=foo,k8s.io/cluster-autoscaler/<cluster-name>=bar,my-custom-tag=custom-value`.
+Now the ASG tags must have the correct values as well as the custom tag to be successfully discovered by the Cluster Autoscaler.

 Example deployment:

@ -192,18 +196,24 @@ only the first instance type found will be used. See [Using Mixed Instances
 Policies and Spot Instances](#Using-Mixed-Instances-Policies-and-Spot-Instances)
 for details.

-Cluster Autoscaler supports hints that nodes will be labelled when they join the
-cluster via ASG tags. The tag is of the format
-`k8s.io/cluster-autoscaler/node-template/label/<label-name>`. `<label-name>` is
+When scaling up from 0 nodes, the Cluster Autoscaler reads ASG tags to derive information about the specifications of the nodes
+i.e labels and taints in that ASG. Note that it does not actually apply these labels or taints - this is done by an AWS generated
+user data script. It gives the Cluster Autoscaler information about whether pending pods will be able to be scheduled should a new node
+be spun up for a particular ASG with the asumption the ASG tags accurately reflect the labels/taint actually applied.
+
+The following is only required if scaling up from 0 nodes. The Cluster Autoscaler will require the label tag
+on the ASG should a deployment have a NodeSelector, else no scaling will occur as the Cluster Autoscaler does not realise
+the ASG has that particular label. The tag is of the format
+`k8s.io/cluster-autoscaler/node-template/label/<label-name>`: `<label-value>` is
 the name of the label and the value of each tag specifies the label value.

 Example tags:

 - `k8s.io/cluster-autoscaler/node-template/label/foo`: `bar`

-Cluster Autoscaler supports hints that nodes will be tainted when they join the
-cluster via ASG tags. The tag is of the format
-`k8s.io/cluster-autoscaler/node-template/taint/<taint-name>`. `<taint-name>` is
+The following is only required if scaling up from 0 nodes. The Cluster Autoscaler will require the taint tag
+on the ASG, else tainted nodes may get spun up that cannot actually have the pending pods run on it. The tag is of the format
+`k8s.io/cluster-autoscaler/node-template/taint/<taint-name>`:`<taint-value:taint-effect>` is
 the name of the taint and the value of each tag specifies the taint value and effect with the format `<taint-value>:<taint-effect>`.

 Example tags:
@ -243,7 +253,9 @@ Recommendations:
 - It is recommended to use a second tag like
  `k8s.io/cluster-autoscaler/<cluster-name>` when
  `k8s.io/cluster-autoscaler/enabled` is used across many clusters to prevent
-  ASGs from different clusters recognized as the node groups.
+  ASGs from different clusters having conflicts.
+  An ASG must contain at least all the tags specified and as such secondary tags can differentiate between different
+  clusters ASGs.
 - To prevent conflicts, do not provide a `--nodes` argument if
  `--node-group-auto-discovery` is specified.
 - Be sure to add `autoscaling:DescribeLaunchConfigurations` or
@ -252,7 +264,7 @@ Recommendations:
  Configurations or Launch Templates.
 - If Cluster Autoscaler adds a node to the cluster, and the node has taints applied
  when it joins the cluster that Cluster Autoscaler was unaware of (because the tag
-  wasn't supplied), this can lead to significant confusion and misbehaviour.
+  wasn't supplied in ASG), this can lead to significant confusion and misbehaviour.

 ### Special note on GPU instances

@ -509,3 +521,4 @@ Please note: it is also possible to mount the cloud config file from host:
  EC2 launch configuration has the setting `Metadata response hop limit` set to `2`.
  Otherwise, the `/latest/api/token` call will timeout and result in an error. See [AWS docs here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html#configuring-instance-metadata-options) for further information.
 - If you don't use EKS managed nodegroups, don't add the `eks:nodegroup-name` tag to the ASG as this will lead to extra EKS API calls that could slow down scaling when there are 0 nodes in the nodegroup.
+- Set `AWS_MAX_ATTEMPTS` to configure max retries
--- a/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/private/protocol/host.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/private/protocol/host.go
@ -8,7 +8,7 @@ import (
 )

 // ValidateEndpointHostHandler is a request handler that will validate the
-// request endpoint's hosts is a valid RFC 3986 host.
+// request endpoint's hosts is a valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) host.
 var ValidateEndpointHostHandler = request.NamedHandler{
 	Name: "awssdk.protocol.ValidateEndpointHostHandler",
 	Fn: func(r *request.Request) {
@ -20,7 +20,7 @@ var ValidateEndpointHostHandler = request.NamedHandler{
 }

 // ValidateEndpointHost validates that the host string passed in is a valid RFC
-// 3986 host. Returns error if the host is not valid.
+// 3986 (https://www.ietf.org/rfc/rfc3986.txt) host. Returns error if the host is not valid.
 func ValidateEndpointHost(opName, host string) error {
 	paramErrs := request.ErrInvalidParams{Context: opName}

@ -71,7 +71,7 @@ func ValidateEndpointHost(opName, host string) error {
 	return nil
 }

-// ValidHostLabel returns if the label is a valid RFC 3986 host label.
+// ValidHostLabel returns if the label is a valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) host label.
 func ValidHostLabel(label string) bool {
 	if l := len(label); l == 0 || l > 63 {
 		return false
@ -90,7 +90,7 @@ func ValidHostLabel(label string) bool {
 	return true
 }

-// ValidPortNumber return if the port is valid RFC 3986 port
+// ValidPortNumber return if the port is valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) port
 func ValidPortNumber(port string) bool {
 	i, err := strconv.Atoi(port)
 	if err != nil {
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@ -120,6 +120,11 @@ func (aws *awsCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
 	}, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (aws *awsCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 func (aws *awsCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
@ -362,14 +367,19 @@ func (ng *AwsNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error)

 // BuildAWS builds AWS cloud provider, manager etc.
 func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
-	var config io.ReadCloser
+	var cfg io.ReadCloser
 	if opts.CloudConfig != "" {
 		var err error
-		config, err = os.Open(opts.CloudConfig)
+		cfg, err = os.Open(opts.CloudConfig)
 		if err != nil {
 			klog.Fatalf("Couldn't open cloud provider configuration %s: %#v", opts.CloudConfig, err)
 		}
-		defer config.Close()
+		defer cfg.Close()
+	}
+
+	sdkProvider, err := createAWSSDKProvider(cfg)
+	if err != nil {
+		klog.Fatalf("Failed to create AWS SDK Provider: %v", err)
 	}

 	// Generate EC2 list
@ -377,12 +387,7 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
 	if opts.AWSUseStaticInstanceList {
 		klog.Warningf("Using static EC2 Instance Types, this list could be outdated. Last update time: %s", lastUpdateTime)
 	} else {
-		region, err := GetCurrentAwsRegion()
-		if err != nil {
-			klog.Fatalf("Failed to get AWS Region: %v", err)
-		}
-
-		generatedInstanceTypes, err := GenerateEC2InstanceTypes(region)
+		generatedInstanceTypes, err := GenerateEC2InstanceTypes(sdkProvider.session)
 		if err != nil {
 			klog.Errorf("Failed to generate AWS EC2 Instance Types: %v, falling back to static list with last update time: %s", err, lastUpdateTime)
 		}
@ -409,7 +414,7 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
 		klog.Infof("Successfully load %d EC2 Instance Types %s", len(keys), keys)
 	}

-	manager, err := CreateAwsManager(config, do, instanceTypes)
+	manager, err := CreateAwsManager(sdkProvider, do, instanceTypes)
 	if err != nil {
 		klog.Fatalf("Failed to create AWS Manager: %v", err)
 	}
--- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go
@ -14,37 +14,31 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */

-//go:generate go run ec2_instance_types/gen.go
+//go:generate go run ec2_instance_types/gen.go -region $AWS_REGION

 package aws

 import (
 	"errors"
 	"fmt"
-	"io"
 	"math/rand"
-	"os"
 	"regexp"
 	"strconv"
 	"strings"
 	"time"

-	"gopkg.in/gcfg.v1"
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/klog/v2"
+
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/endpoints"
-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/eks"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
-	klog "k8s.io/klog/v2"
-	provider_aws "k8s.io/legacy-cloud-providers/aws"
 )

 const (
@ -74,131 +68,15 @@ type asgTemplate struct {
 	Tags         []*autoscaling.TagDescription
 }

-func validateOverrides(cfg *provider_aws.CloudConfig) error {
-	if len(cfg.ServiceOverride) == 0 {
-		return nil
-	}
-	set := make(map[string]bool)
-	for onum, ovrd := range cfg.ServiceOverride {
-		// Note: gcfg does not space trim, so we have to when comparing to empty string ""
-		name := strings.TrimSpace(ovrd.Service)
-		if name == "" {
-			return fmt.Errorf("service name is missing [Service is \"\"] in override %s", onum)
-		}
-		// insure the map service name is space trimmed
-		ovrd.Service = name
-
-		region := strings.TrimSpace(ovrd.Region)
-		if region == "" {
-			return fmt.Errorf("service region is missing [Region is \"\"] in override %s", onum)
-		}
-		// insure the map region is space trimmed
-		ovrd.Region = region
-
-		url := strings.TrimSpace(ovrd.URL)
-		if url == "" {
-			return fmt.Errorf("url is missing [URL is \"\"] in override %s", onum)
-		}
-		signingRegion := strings.TrimSpace(ovrd.SigningRegion)
-		if signingRegion == "" {
-			return fmt.Errorf("signingRegion is missing [SigningRegion is \"\"] in override %s", onum)
-		}
-		signature := name + "_" + region
-		if set[signature] {
-			return fmt.Errorf("duplicate entry found for service override [%s] (%s in %s)", onum, name, region)
-		}
-		set[signature] = true
-	}
-	return nil
-}
-
-func getResolver(cfg *provider_aws.CloudConfig) endpoints.ResolverFunc {
-	defaultResolver := endpoints.DefaultResolver()
-	defaultResolverFn := func(service, region string,
-		optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
-		return defaultResolver.EndpointFor(service, region, optFns...)
-	}
-	if len(cfg.ServiceOverride) == 0 {
-		return defaultResolverFn
-	}
-
-	return func(service, region string,
-		optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
-		for _, override := range cfg.ServiceOverride {
-			if override.Service == service && override.Region == region {
-				return endpoints.ResolvedEndpoint{
-					URL:           override.URL,
-					SigningRegion: override.SigningRegion,
-					SigningMethod: override.SigningMethod,
-					SigningName:   override.SigningName,
-				}, nil
-			}
-		}
-		return defaultResolver.EndpointFor(service, region, optFns...)
-	}
-}
-
-type awsSDKProvider struct {
-	cfg *provider_aws.CloudConfig
-}
-
-func newAWSSDKProvider(cfg *provider_aws.CloudConfig) *awsSDKProvider {
-	return &awsSDKProvider{
-		cfg: cfg,
-	}
-}
-
-// getRegion deduces the current AWS Region.
-func getRegion(cfg ...*aws.Config) string {
-	region, present := os.LookupEnv("AWS_REGION")
-	if !present {
-		sess, err := session.NewSession()
-		if err != nil {
-			klog.Errorf("Error getting AWS session while retrieving region: %v", err)
-		} else {
-			svc := ec2metadata.New(sess, cfg...)
-			if r, err := svc.Region(); err == nil {
-				region = r
-			}
-		}
-	}
-	return region
-}
-
 // createAwsManagerInternal allows for custom objects to be passed in by tests
-//
-// #1449 If running tests outside of AWS without AWS_REGION among environment
-// variables, avoid a 5+ second EC2 Metadata lookup timeout in getRegion by
-// setting and resetting AWS_REGION before calling createAWSManagerInternal:
-//
-//	defer resetAWSRegion(os.LookupEnv("AWS_REGION"))
-//	os.Setenv("AWS_REGION", "fanghorn")
 func createAWSManagerInternal(
-	configReader io.Reader,
+	awsSDKProvider *awsSDKProvider,
 	discoveryOpts cloudprovider.NodeGroupDiscoveryOptions,
 	awsService *awsWrapper,
 	instanceTypes map[string]*InstanceType,
 ) (*AwsManager, error) {
-
-	cfg, err := readAWSCloudConfig(configReader)
-	if err != nil {
-		klog.Errorf("Couldn't read config: %v", err)
-		return nil, err
-	}
-
-	if err = validateOverrides(cfg); err != nil {
-		klog.Errorf("Unable to validate custom endpoint overrides: %v", err)
-		return nil, err
-	}
-
 	if awsService == nil {
-		awsSdkProvider := newAWSSDKProvider(cfg)
-		sess, err := session.NewSession(aws.NewConfig().WithRegion(getRegion()).
-			WithEndpointResolver(getResolver(awsSdkProvider.cfg)))
-		if err != nil {
-			return nil, err
-		}
-
+		sess := awsSDKProvider.session
 		awsService = &awsWrapper{autoscaling.New(sess), ec2.New(sess), eks.New(sess)}
 	}

@ -228,24 +106,9 @@ func createAWSManagerInternal(
 	return manager, nil
 }

-// readAWSCloudConfig reads an instance of AWSCloudConfig from config reader.
-func readAWSCloudConfig(config io.Reader) (*provider_aws.CloudConfig, error) {
-	var cfg provider_aws.CloudConfig
-	var err error
-
-	if config != nil {
-		err = gcfg.ReadInto(&cfg, config)
-		if err != nil {
-			return nil, err
-		}
-	}
-
-	return &cfg, nil
-}
-
 // CreateAwsManager constructs awsManager object.
-func CreateAwsManager(configReader io.Reader, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, instanceTypes map[string]*InstanceType) (*AwsManager, error) {
-	return createAWSManagerInternal(configReader, discoveryOpts, nil, instanceTypes)
+func CreateAwsManager(awsSDKProvider *awsSDKProvider, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, instanceTypes map[string]*InstanceType) (*AwsManager, error) {
+	return createAWSManagerInternal(awsSDKProvider, discoveryOpts, nil, instanceTypes)
 }

 // Refresh is called before every main loop and can be used to dynamically update cloud provider state.
--- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
@ -17,12 +17,7 @@ limitations under the License.
 package aws

 import (
-	"encoding/json"
 	"fmt"
-	"io"
-	"net/http"
-	"net/http/httptest"
-	"os"
 	"reflect"
 	"sort"
 	"strconv"
@ -37,33 +32,12 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
-	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
 	"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
-	provider_aws "k8s.io/legacy-cloud-providers/aws"
 )

-// TestGetRegion ensures correct source supplies AWS Region.
-func TestGetRegion(t *testing.T) {
-	key := "AWS_REGION"
-	// Ensure environment variable retains precedence.
-	expected1 := "the-shire-1"
-	t.Setenv(key, expected1)
-	assert.Equal(t, expected1, getRegion())
-	// Ensure without environment variable, EC2 Metadata is used.
-	expected2 := "mordor-2"
-	expectedjson := ec2metadata.EC2InstanceIdentityDocument{Region: expected2}
-	js, _ := json.Marshal(expectedjson)
-	os.Unsetenv(key)
-	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-		w.Write(js)
-	}))
-	cfg := aws.NewConfig().WithEndpoint(server.URL)
-	assert.Equal(t, expected2, getRegion(cfg))
-}
-
 func TestJoinNodeLabelsChoosingUserValuesOverAPIValues(t *testing.T) {
 	extractedLabels := make(map[string]string)
 	mngLabels := make(map[string]string)
@ -820,271 +794,6 @@ type ServiceDescriptor struct {
 	signingName                  string
 }

-func TestOverridesActiveConfig(t *testing.T) {
-	tests := []struct {
-		name string
-
-		reader io.Reader
-		aws    provider_aws.Services
-
-		expectError        bool
-		active             bool
-		servicesOverridden []ServiceDescriptor
-	}{
-		{
-			"No overrides",
-			strings.NewReader(`
-				[global]
-				`),
-			nil,
-			false, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Missing Service Name",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-				Region=sregion
-				URL=https://s3.foo.bar
-				SigningRegion=sregion
-				SigningMethod = sign
-				`),
-			nil,
-			true, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Missing Service Region",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-				Service=s3
-				URL=https://s3.foo.bar
-				SigningRegion=sregion
-				SigningMethod = sign
-				`),
-			nil,
-			true, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Missing URL",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-				Service="s3"
-				Region=sregion
-				SigningRegion=sregion
-				SigningMethod = sign
-				`),
-			nil,
-			true, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Missing Signing Region",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-				Service=s3
-				Region=sregion
-				URL=https://s3.foo.bar
-				SigningMethod = sign
-				`),
-			nil,
-			true, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Active Overrides",
-			strings.NewReader(`
-				[Global]
-				[ServiceOverride "1"]
-				Service = "s3      "
-				Region = sregion
-				URL = https://s3.foo.bar
-				SigningRegion = sregion
-				SigningMethod = v4
-				`),
-			nil,
-			false, true,
-			[]ServiceDescriptor{{name: "s3", region: "sregion", signingRegion: "sregion", signingMethod: "v4"}},
-		},
-		{
-			"Multiple Overridden Services",
-			strings.NewReader(`
-				[Global]
-				vpc = vpc-abc1234567
-				[ServiceOverride "1"]
-				Service=s3
-				Region=sregion1
-				URL=https://s3.foo.bar
-				SigningRegion=sregion1
-				SigningMethod = v4
-				[ServiceOverride "2"]
-				Service=ec2
-				Region=sregion2
-				URL=https://ec2.foo.bar
-				SigningRegion=sregion2
-				SigningMethod = v4
-				`),
-			nil,
-			false, true,
-			[]ServiceDescriptor{{name: "s3", region: "sregion1", signingRegion: "sregion1", signingMethod: "v4"},
-				{name: "ec2", region: "sregion2", signingRegion: "sregion2", signingMethod: "v4"}},
-		},
-		{
-			"Duplicate Services",
-			strings.NewReader(`
-				[Global]
-				vpc = vpc-abc1234567
-				[ServiceOverride "1"]
-				Service=s3
-				Region=sregion1
-				URL=https://s3.foo.bar
-				SigningRegion=sregion
-				SigningMethod = sign
-				[ServiceOverride "2"]
-				Service=s3
-				Region=sregion1
-				URL=https://s3.foo.bar
-				SigningRegion=sregion
-				SigningMethod = sign
-				`),
-			nil,
-			true, false,
-			[]ServiceDescriptor{},
-		},
-		{
-			"Multiple Overridden Services in Multiple regions",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-			 	Service=s3
-				Region=region1
-				URL=https://s3.foo.bar
-				SigningRegion=sregion1
-				[ServiceOverride "2"]
-				Service=ec2
-				Region=region2
-				URL=https://ec2.foo.bar
-				SigningRegion=sregion
-				SigningMethod = v4
-				`),
-			nil,
-			false, true,
-			[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: ""},
-				{name: "ec2", region: "region2", signingRegion: "sregion", signingMethod: "v4"}},
-		},
-		{
-			"Multiple regions, Same Service",
-			strings.NewReader(`
-				[global]
-				[ServiceOverride "1"]
-				Service=s3
-				Region=region1
-				URL=https://s3.foo.bar
-				SigningRegion=sregion1
-				SigningMethod = v3
-				[ServiceOverride "2"]
-				Service=s3
-				Region=region2
-				URL=https://s3.foo.bar
-				SigningRegion=sregion1
-				SigningMethod = v4
-				SigningName = "name"
-				`),
-			nil,
-			false, true,
-			[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: "v3"},
-				{name: "s3", region: "region2", signingRegion: "sregion1", signingMethod: "v4", signingName: "name"}},
-		},
-	}
-
-	for _, test := range tests {
-		t.Logf("Running test case %s", test.name)
-		cfg, err := readAWSCloudConfig(test.reader)
-		if err == nil {
-			err = validateOverrides(cfg)
-		}
-		if test.expectError {
-			if err == nil {
-				t.Errorf("Should error for case %s (cfg=%v)", test.name, cfg)
-			}
-		} else {
-			if err != nil {
-				t.Errorf("Should succeed for case: %s, got %v", test.name, err)
-			}
-
-			if len(cfg.ServiceOverride) != len(test.servicesOverridden) {
-				t.Errorf("Expected %d overridden services, received %d for case %s",
-					len(test.servicesOverridden), len(cfg.ServiceOverride), test.name)
-			} else {
-				for _, sd := range test.servicesOverridden {
-					var found *struct {
-						Service       string
-						Region        string
-						URL           string
-						SigningRegion string
-						SigningMethod string
-						SigningName   string
-					}
-					for _, v := range cfg.ServiceOverride {
-						if v.Service == sd.name && v.Region == sd.region {
-							found = v
-							break
-						}
-					}
-					if found == nil {
-						t.Errorf("Missing override for service %s in case %s",
-							sd.name, test.name)
-					} else {
-						if found.SigningRegion != sd.signingRegion {
-							t.Errorf("Expected signing region '%s', received '%s' for case %s",
-								sd.signingRegion, found.SigningRegion, test.name)
-						}
-						if found.SigningMethod != sd.signingMethod {
-							t.Errorf("Expected signing method '%s', received '%s' for case %s",
-								sd.signingMethod, found.SigningRegion, test.name)
-						}
-						targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
-						if found.URL != targetName {
-							t.Errorf("Expected Endpoint '%s', received '%s' for case %s",
-								targetName, found.URL, test.name)
-						}
-						if found.SigningName != sd.signingName {
-							t.Errorf("Expected signing name '%s', received '%s' for case %s",
-								sd.signingName, found.SigningName, test.name)
-						}
-
-						fn := getResolver(cfg)
-						ep1, e := fn(sd.name, sd.region, nil)
-						if e != nil {
-							t.Errorf("Expected a valid endpoint for %s in case %s",
-								sd.name, test.name)
-						} else {
-							targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
-							if ep1.URL != targetName {
-								t.Errorf("Expected endpoint url: %s, received %s in case %s",
-									targetName, ep1.URL, test.name)
-							}
-							if ep1.SigningRegion != sd.signingRegion {
-								t.Errorf("Expected signing region '%s', received '%s' in case %s",
-									sd.signingRegion, ep1.SigningRegion, test.name)
-							}
-							if ep1.SigningMethod != sd.signingMethod {
-								t.Errorf("Expected signing method '%s', received '%s' in case %s",
-									sd.signingMethod, ep1.SigningRegion, test.name)
-							}
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
 func tagsMatcher(expected *autoscaling.DescribeAutoScalingGroupsInput) func(*autoscaling.DescribeAutoScalingGroupsInput) bool {
 	return func(actual *autoscaling.DescribeAutoScalingGroupsInput) bool {
 		expectedTags := flatTagSlice(expected.Filters)
--- a/cluster-autoscaler/cloudprovider/aws/aws_sdk_provider.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_sdk_provider.go
@ -0,0 +1,188 @@
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aws
+
+import (
+	"fmt"
+	"gopkg.in/gcfg.v1"
+	"io"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/endpoints"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
+	"k8s.io/klog/v2"
+	provider_aws "k8s.io/legacy-cloud-providers/aws"
+	"os"
+	"strconv"
+	"strings"
+)
+
+// createAWSSDKProvider
+//
+// #1449 If running tests outside of AWS without AWS_REGION among environment
+// variables, avoid a 5+ second EC2 Metadata lookup timeout in getRegion by
+// setting and resetting AWS_REGION before calling createAWSSDKProvider:
+//
+// t.Setenv("AWS_REGION", "fanghorn")
+func createAWSSDKProvider(configReader io.Reader) (*awsSDKProvider, error) {
+	cfg, err := readAWSCloudConfig(configReader)
+	if err != nil {
+		klog.Errorf("Couldn't read config: %v", err)
+		return nil, err
+	}
+
+	if err = validateOverrides(cfg); err != nil {
+		klog.Errorf("Unable to validate custom endpoint overrides: %v", err)
+		return nil, err
+	}
+
+	config := aws.NewConfig().
+		WithRegion(getRegion()).
+		WithEndpointResolver(getResolver(cfg))
+
+	config, err = setMaxRetriesFromEnv(config)
+	if err != nil {
+		return nil, err
+	}
+
+	sess, err := session.NewSession(config)
+
+	if err != nil {
+		return nil, err
+	}
+
+	provider := &awsSDKProvider{
+		session: sess,
+	}
+
+	return provider, nil
+}
+
+// setMaxRetriesFromEnv sets aws config MaxRetries by reading AWS_MAX_ATTEMPTS
+// aws sdk does not auto-set these so instead of having more config options we can reuse what the aws cli
+// does and read AWS_MAX_ATTEMPTS from the env https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
+func setMaxRetriesFromEnv(config *aws.Config) (*aws.Config, error) {
+	maxRetries := os.Getenv("AWS_MAX_ATTEMPTS")
+	if maxRetries != "" {
+		num, err := strconv.Atoi(maxRetries)
+		if err != nil {
+			return nil, err
+		}
+		config = config.WithMaxRetries(num)
+	}
+	return config, nil
+}
+
+type awsSDKProvider struct {
+	session *session.Session
+}
+
+// readAWSCloudConfig reads an instance of AWSCloudConfig from config reader.
+func readAWSCloudConfig(config io.Reader) (*provider_aws.CloudConfig, error) {
+	var cfg provider_aws.CloudConfig
+	var err error
+
+	if config != nil {
+		err = gcfg.ReadInto(&cfg, config)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return &cfg, nil
+}
+
+func validateOverrides(cfg *provider_aws.CloudConfig) error {
+	if len(cfg.ServiceOverride) == 0 {
+		return nil
+	}
+	set := make(map[string]bool)
+	for onum, ovrd := range cfg.ServiceOverride {
+		// Note: gcfg does not space trim, so we have to when comparing to empty string ""
+		name := strings.TrimSpace(ovrd.Service)
+		if name == "" {
+			return fmt.Errorf("service name is missing [Service is \"\"] in override %s", onum)
+		}
+		// insure the map service name is space trimmed
+		ovrd.Service = name
+
+		region := strings.TrimSpace(ovrd.Region)
+		if region == "" {
+			return fmt.Errorf("service region is missing [Region is \"\"] in override %s", onum)
+		}
+		// insure the map region is space trimmed
+		ovrd.Region = region
+
+		url := strings.TrimSpace(ovrd.URL)
+		if url == "" {
+			return fmt.Errorf("url is missing [URL is \"\"] in override %s", onum)
+		}
+		signingRegion := strings.TrimSpace(ovrd.SigningRegion)
+		if signingRegion == "" {
+			return fmt.Errorf("signingRegion is missing [SigningRegion is \"\"] in override %s", onum)
+		}
+		signature := name + "_" + region
+		if set[signature] {
+			return fmt.Errorf("duplicate entry found for service override [%s] (%s in %s)", onum, name, region)
+		}
+		set[signature] = true
+	}
+	return nil
+}
+
+func getResolver(cfg *provider_aws.CloudConfig) endpoints.ResolverFunc {
+	defaultResolver := endpoints.DefaultResolver()
+	defaultResolverFn := func(service, region string,
+		optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
+		return defaultResolver.EndpointFor(service, region, optFns...)
+	}
+	if len(cfg.ServiceOverride) == 0 {
+		return defaultResolverFn
+	}
+
+	return func(service, region string,
+		optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
+		for _, override := range cfg.ServiceOverride {
+			if override.Service == service && override.Region == region {
+				return endpoints.ResolvedEndpoint{
+					URL:           override.URL,
+					SigningRegion: override.SigningRegion,
+					SigningMethod: override.SigningMethod,
+					SigningName:   override.SigningName,
+				}, nil
+			}
+		}
+		return defaultResolver.EndpointFor(service, region, optFns...)
+	}
+}
+
+// getRegion deduces the current AWS Region.
+func getRegion(cfg ...*aws.Config) string {
+	region, present := os.LookupEnv("AWS_REGION")
+	if !present {
+		sess, err := session.NewSession()
+		if err != nil {
+			klog.Errorf("Error getting AWS session while retrieving region: %v", err)
+		} else {
+			svc := ec2metadata.New(sess, cfg...)
+			if r, err := svc.Region(); err == nil {
+				region = r
+			}
+		}
+	}
+	return region
+}
--- a/cluster-autoscaler/cloudprovider/aws/aws_sdk_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_sdk_provider_test.go
@ -0,0 +1,316 @@
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package aws
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/stretchr/testify/assert"
+	"io"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
+	provider_aws "k8s.io/legacy-cloud-providers/aws"
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"strings"
+	"testing"
+)
+
+// TestGetRegion ensures correct source supplies AWS Region.
+func TestGetRegion(t *testing.T) {
+	key := "AWS_REGION"
+	// Ensure environment variable retains precedence.
+	expected1 := "the-shire-1"
+	t.Setenv(key, expected1)
+	assert.Equal(t, expected1, getRegion())
+	// Ensure without environment variable, EC2 Metadata is used.
+	expected2 := "mordor-2"
+	expectedjson := ec2metadata.EC2InstanceIdentityDocument{Region: expected2}
+	js, _ := json.Marshal(expectedjson)
+	os.Unsetenv(key)
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Write(js)
+	}))
+	cfg := aws.NewConfig().WithEndpoint(server.URL)
+	assert.Equal(t, expected2, getRegion(cfg))
+}
+
+func TestOverridesActiveConfig(t *testing.T) {
+	tests := []struct {
+		name string
+
+		reader io.Reader
+		aws    provider_aws.Services
+
+		expectError        bool
+		active             bool
+		servicesOverridden []ServiceDescriptor
+	}{
+		{
+			"No overrides",
+			strings.NewReader(`
+				[global]
+				`),
+			nil,
+			false, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Missing Service Name",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+				Region=sregion
+				URL=https://s3.foo.bar
+				SigningRegion=sregion
+				SigningMethod = sign
+				`),
+			nil,
+			true, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Missing Service Region",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+				Service=s3
+				URL=https://s3.foo.bar
+				SigningRegion=sregion
+				SigningMethod = sign
+				`),
+			nil,
+			true, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Missing URL",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+				Service="s3"
+				Region=sregion
+				SigningRegion=sregion
+				SigningMethod = sign
+				`),
+			nil,
+			true, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Missing Signing Region",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+				Service=s3
+				Region=sregion
+				URL=https://s3.foo.bar
+				SigningMethod = sign
+				`),
+			nil,
+			true, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Active Overrides",
+			strings.NewReader(`
+				[Global]
+				[ServiceOverride "1"]
+				Service = "s3      "
+				Region = sregion
+				URL = https://s3.foo.bar
+				SigningRegion = sregion
+				SigningMethod = v4
+				`),
+			nil,
+			false, true,
+			[]ServiceDescriptor{{name: "s3", region: "sregion", signingRegion: "sregion", signingMethod: "v4"}},
+		},
+		{
+			"Multiple Overridden Services",
+			strings.NewReader(`
+				[Global]
+				vpc = vpc-abc1234567
+				[ServiceOverride "1"]
+				Service=s3
+				Region=sregion1
+				URL=https://s3.foo.bar
+				SigningRegion=sregion1
+				SigningMethod = v4
+				[ServiceOverride "2"]
+				Service=ec2
+				Region=sregion2
+				URL=https://ec2.foo.bar
+				SigningRegion=sregion2
+				SigningMethod = v4
+				`),
+			nil,
+			false, true,
+			[]ServiceDescriptor{{name: "s3", region: "sregion1", signingRegion: "sregion1", signingMethod: "v4"},
+				{name: "ec2", region: "sregion2", signingRegion: "sregion2", signingMethod: "v4"}},
+		},
+		{
+			"Duplicate Services",
+			strings.NewReader(`
+				[Global]
+				vpc = vpc-abc1234567
+				[ServiceOverride "1"]
+				Service=s3
+				Region=sregion1
+				URL=https://s3.foo.bar
+				SigningRegion=sregion
+				SigningMethod = sign
+				[ServiceOverride "2"]
+				Service=s3
+				Region=sregion1
+				URL=https://s3.foo.bar
+				SigningRegion=sregion
+				SigningMethod = sign
+				`),
+			nil,
+			true, false,
+			[]ServiceDescriptor{},
+		},
+		{
+			"Multiple Overridden Services in Multiple regions",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+			 	Service=s3
+				Region=region1
+				URL=https://s3.foo.bar
+				SigningRegion=sregion1
+				[ServiceOverride "2"]
+				Service=ec2
+				Region=region2
+				URL=https://ec2.foo.bar
+				SigningRegion=sregion
+				SigningMethod = v4
+				`),
+			nil,
+			false, true,
+			[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: ""},
+				{name: "ec2", region: "region2", signingRegion: "sregion", signingMethod: "v4"}},
+		},
+		{
+			"Multiple regions, Same Service",
+			strings.NewReader(`
+				[global]
+				[ServiceOverride "1"]
+				Service=s3
+				Region=region1
+				URL=https://s3.foo.bar
+				SigningRegion=sregion1
+				SigningMethod = v3
+				[ServiceOverride "2"]
+				Service=s3
+				Region=region2
+				URL=https://s3.foo.bar
+				SigningRegion=sregion1
+				SigningMethod = v4
+				SigningName = "name"
+				`),
+			nil,
+			false, true,
+			[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: "v3"},
+				{name: "s3", region: "region2", signingRegion: "sregion1", signingMethod: "v4", signingName: "name"}},
+		},
+	}
+
+	for _, test := range tests {
+		t.Logf("Running test case %s", test.name)
+		cfg, err := readAWSCloudConfig(test.reader)
+		if err == nil {
+			err = validateOverrides(cfg)
+		}
+		if test.expectError {
+			if err == nil {
+				t.Errorf("Should error for case %s (cfg=%v)", test.name, cfg)
+			}
+		} else {
+			if err != nil {
+				t.Errorf("Should succeed for case: %s, got %v", test.name, err)
+			}
+
+			if len(cfg.ServiceOverride) != len(test.servicesOverridden) {
+				t.Errorf("Expected %d overridden services, received %d for case %s",
+					len(test.servicesOverridden), len(cfg.ServiceOverride), test.name)
+			} else {
+				for _, sd := range test.servicesOverridden {
+					var found *struct {
+						Service       string
+						Region        string
+						URL           string
+						SigningRegion string
+						SigningMethod string
+						SigningName   string
+					}
+					for _, v := range cfg.ServiceOverride {
+						if v.Service == sd.name && v.Region == sd.region {
+							found = v
+							break
+						}
+					}
+					if found == nil {
+						t.Errorf("Missing override for service %s in case %s",
+							sd.name, test.name)
+					} else {
+						if found.SigningRegion != sd.signingRegion {
+							t.Errorf("Expected signing region '%s', received '%s' for case %s",
+								sd.signingRegion, found.SigningRegion, test.name)
+						}
+						if found.SigningMethod != sd.signingMethod {
+							t.Errorf("Expected signing method '%s', received '%s' for case %s",
+								sd.signingMethod, found.SigningRegion, test.name)
+						}
+						targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
+						if found.URL != targetName {
+							t.Errorf("Expected Endpoint '%s', received '%s' for case %s",
+								targetName, found.URL, test.name)
+						}
+						if found.SigningName != sd.signingName {
+							t.Errorf("Expected signing name '%s', received '%s' for case %s",
+								sd.signingName, found.SigningName, test.name)
+						}
+
+						fn := getResolver(cfg)
+						ep1, e := fn(sd.name, sd.region, nil)
+						if e != nil {
+							t.Errorf("Expected a valid endpoint for %s in case %s",
+								sd.name, test.name)
+						} else {
+							targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
+							if ep1.URL != targetName {
+								t.Errorf("Expected endpoint url: %s, received %s in case %s",
+									targetName, ep1.URL, test.name)
+							}
+							if ep1.SigningRegion != sd.signingRegion {
+								t.Errorf("Expected signing region '%s', received '%s' in case %s",
+									sd.signingRegion, ep1.SigningRegion, test.name)
+							}
+							if ep1.SigningMethod != sd.signingMethod {
+								t.Errorf("Expected signing method '%s', received '%s' in case %s",
+									sd.signingMethod, ep1.SigningRegion, test.name)
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
--- a/cluster-autoscaler/cloudprovider/aws/aws_util.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_util.go
@ -32,19 +32,12 @@ var (
 )

 // GenerateEC2InstanceTypes returns a map of ec2 resources
-func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
-	sess, err := session.NewSession(&aws.Config{
-		Region: aws.String(region)},
-	)
-	if err != nil {
-		return nil, err
-	}
-
+func GenerateEC2InstanceTypes(sess *session.Session) (map[string]*InstanceType, error) {
 	ec2Client := ec2.New(sess)
 	input := ec2.DescribeInstanceTypesInput{}
 	instanceTypes := make(map[string]*InstanceType)

-	if err = ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool {
+	if err := ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool {
 		for _, rawInstanceType := range page.InstanceTypes {
 			instanceTypes[*rawInstanceType.InstanceType] = transformInstanceType(rawInstanceType)
 		}
--- a/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_wrapper.go
@ -91,6 +91,10 @@ func (m *awsWrapper) getManagedNodegroupInfo(nodegroupName string, clusterName s
 		labels["k8sVersion"] = *r.Nodegroup.Version
 	}

+	if r.Nodegroup.NodegroupName != nil && len(*r.Nodegroup.NodegroupName) > 0 {
+		labels["eks.amazonaws.com/nodegroup"] = *r.Nodegroup.NodegroupName
+	}
+
 	if r.Nodegroup.Labels != nil && len(r.Nodegroup.Labels) > 0 {
 		labelsMap := r.Nodegroup.Labels
 		for k, v := range labelsMap {
--- a/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_wrapper_test.go
@ -163,13 +163,14 @@ func TestGetManagedNodegroup(t *testing.T) {
 	assert.Equal(t, taintList[1].Effect, apiv1.TaintEffect(taintEffect2))
 	assert.Equal(t, taintList[1].Key, taintKey2)
 	assert.Equal(t, taintList[1].Value, taintValue2)
-	assert.Equal(t, len(labelMap), 6)
+	assert.Equal(t, len(labelMap), 7)
 	assert.Equal(t, labelMap[labelKey1], labelValue1)
 	assert.Equal(t, labelMap[labelKey2], labelValue2)
 	assert.Equal(t, labelMap["diskSize"], strconv.FormatInt(diskSize, 10))
 	assert.Equal(t, labelMap["amiType"], amiType)
 	assert.Equal(t, labelMap["capacityType"], capacityType)
 	assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
+	assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
 }

 func TestGetManagedNodegroupWithNilValues(t *testing.T) {
@ -207,10 +208,11 @@ func TestGetManagedNodegroupWithNilValues(t *testing.T) {
 	taintList, labelMap, err := awsWrapper.getManagedNodegroupInfo(nodegroupName, clusterName)
 	assert.Nil(t, err)
 	assert.Equal(t, len(taintList), 0)
-	assert.Equal(t, len(labelMap), 3)
+	assert.Equal(t, len(labelMap), 4)
 	assert.Equal(t, labelMap["amiType"], amiType)
 	assert.Equal(t, labelMap["capacityType"], capacityType)
 	assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
+	assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
 }

 func TestGetManagedNodegroupWithEmptyValues(t *testing.T) {
@ -248,10 +250,11 @@ func TestGetManagedNodegroupWithEmptyValues(t *testing.T) {
 	taintList, labelMap, err := awsWrapper.getManagedNodegroupInfo(nodegroupName, clusterName)
 	assert.Nil(t, err)
 	assert.Equal(t, len(taintList), 0)
-	assert.Equal(t, len(labelMap), 3)
+	assert.Equal(t, len(labelMap), 4)
 	assert.Equal(t, labelMap["amiType"], amiType)
 	assert.Equal(t, labelMap["capacityType"], capacityType)
 	assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
+	assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
 }

 func TestMoreThen100Groups(t *testing.T) {
--- a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go
+++ b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go
@ -28,7 +28,7 @@ type InstanceType struct {
 }

 // StaticListLastUpdateTime is a string declaring the last time the static list was updated.
-var StaticListLastUpdateTime = "2022-09-16"
+var StaticListLastUpdateTime = "2022-12-11"

 // InstanceTypes is a map of ec2 resources
 var InstanceTypes = map[string]*InstanceType{
@ -844,6 +844,69 @@ var InstanceTypes = map[string]*InstanceType{
 		GPU:          0,
 		Architecture: "amd64",
 	},
+	"c6in.12xlarge": {
+		InstanceType: "c6in.12xlarge",
+		VCPU:         48,
+		MemoryMb:     98304,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.16xlarge": {
+		InstanceType: "c6in.16xlarge",
+		VCPU:         64,
+		MemoryMb:     131072,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.24xlarge": {
+		InstanceType: "c6in.24xlarge",
+		VCPU:         96,
+		MemoryMb:     196608,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.2xlarge": {
+		InstanceType: "c6in.2xlarge",
+		VCPU:         8,
+		MemoryMb:     16384,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.32xlarge": {
+		InstanceType: "c6in.32xlarge",
+		VCPU:         128,
+		MemoryMb:     262144,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.4xlarge": {
+		InstanceType: "c6in.4xlarge",
+		VCPU:         16,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.8xlarge": {
+		InstanceType: "c6in.8xlarge",
+		VCPU:         32,
+		MemoryMb:     65536,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.large": {
+		InstanceType: "c6in.large",
+		VCPU:         2,
+		MemoryMb:     4096,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"c6in.xlarge": {
+		InstanceType: "c6in.xlarge",
+		VCPU:         4,
+		MemoryMb:     8192,
+		GPU:          0,
+		Architecture: "amd64",
+	},
 	"c7g.12xlarge": {
 		InstanceType: "c7g.12xlarge",
 		VCPU:         48,
@ -2461,6 +2524,132 @@ var InstanceTypes = map[string]*InstanceType{
 		GPU:          0,
 		Architecture: "amd64",
 	},
+	"m6idn.12xlarge": {
+		InstanceType: "m6idn.12xlarge",
+		VCPU:         48,
+		MemoryMb:     196608,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.16xlarge": {
+		InstanceType: "m6idn.16xlarge",
+		VCPU:         64,
+		MemoryMb:     262144,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.24xlarge": {
+		InstanceType: "m6idn.24xlarge",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.2xlarge": {
+		InstanceType: "m6idn.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.32xlarge": {
+		InstanceType: "m6idn.32xlarge",
+		VCPU:         128,
+		MemoryMb:     524288,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.4xlarge": {
+		InstanceType: "m6idn.4xlarge",
+		VCPU:         16,
+		MemoryMb:     65536,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.8xlarge": {
+		InstanceType: "m6idn.8xlarge",
+		VCPU:         32,
+		MemoryMb:     131072,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.large": {
+		InstanceType: "m6idn.large",
+		VCPU:         2,
+		MemoryMb:     8192,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6idn.xlarge": {
+		InstanceType: "m6idn.xlarge",
+		VCPU:         4,
+		MemoryMb:     16384,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.12xlarge": {
+		InstanceType: "m6in.12xlarge",
+		VCPU:         48,
+		MemoryMb:     196608,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.16xlarge": {
+		InstanceType: "m6in.16xlarge",
+		VCPU:         64,
+		MemoryMb:     262144,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.24xlarge": {
+		InstanceType: "m6in.24xlarge",
+		VCPU:         96,
+		MemoryMb:     393216,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.2xlarge": {
+		InstanceType: "m6in.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.32xlarge": {
+		InstanceType: "m6in.32xlarge",
+		VCPU:         128,
+		MemoryMb:     524288,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.4xlarge": {
+		InstanceType: "m6in.4xlarge",
+		VCPU:         16,
+		MemoryMb:     65536,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.8xlarge": {
+		InstanceType: "m6in.8xlarge",
+		VCPU:         32,
+		MemoryMb:     131072,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.large": {
+		InstanceType: "m6in.large",
+		VCPU:         2,
+		MemoryMb:     8192,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"m6in.xlarge": {
+		InstanceType: "m6in.xlarge",
+		VCPU:         4,
+		MemoryMb:     16384,
+		GPU:          0,
+		Architecture: "amd64",
+	},
 	"mac1.metal": {
 		InstanceType: "mac1.metal",
 		VCPU:         12,
@ -3378,6 +3567,132 @@ var InstanceTypes = map[string]*InstanceType{
 		GPU:          0,
 		Architecture: "amd64",
 	},
+	"r6idn.12xlarge": {
+		InstanceType: "r6idn.12xlarge",
+		VCPU:         48,
+		MemoryMb:     393216,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.16xlarge": {
+		InstanceType: "r6idn.16xlarge",
+		VCPU:         64,
+		MemoryMb:     524288,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.24xlarge": {
+		InstanceType: "r6idn.24xlarge",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.2xlarge": {
+		InstanceType: "r6idn.2xlarge",
+		VCPU:         8,
+		MemoryMb:     65536,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.32xlarge": {
+		InstanceType: "r6idn.32xlarge",
+		VCPU:         128,
+		MemoryMb:     1048576,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.4xlarge": {
+		InstanceType: "r6idn.4xlarge",
+		VCPU:         16,
+		MemoryMb:     131072,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.8xlarge": {
+		InstanceType: "r6idn.8xlarge",
+		VCPU:         32,
+		MemoryMb:     262144,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.large": {
+		InstanceType: "r6idn.large",
+		VCPU:         2,
+		MemoryMb:     16384,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6idn.xlarge": {
+		InstanceType: "r6idn.xlarge",
+		VCPU:         4,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.12xlarge": {
+		InstanceType: "r6in.12xlarge",
+		VCPU:         48,
+		MemoryMb:     393216,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.16xlarge": {
+		InstanceType: "r6in.16xlarge",
+		VCPU:         64,
+		MemoryMb:     524288,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.24xlarge": {
+		InstanceType: "r6in.24xlarge",
+		VCPU:         96,
+		MemoryMb:     786432,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.2xlarge": {
+		InstanceType: "r6in.2xlarge",
+		VCPU:         8,
+		MemoryMb:     65536,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.32xlarge": {
+		InstanceType: "r6in.32xlarge",
+		VCPU:         128,
+		MemoryMb:     1048576,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.4xlarge": {
+		InstanceType: "r6in.4xlarge",
+		VCPU:         16,
+		MemoryMb:     131072,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.8xlarge": {
+		InstanceType: "r6in.8xlarge",
+		VCPU:         32,
+		MemoryMb:     262144,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.large": {
+		InstanceType: "r6in.large",
+		VCPU:         2,
+		MemoryMb:     16384,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"r6in.xlarge": {
+		InstanceType: "r6in.xlarge",
+		VCPU:         4,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
 	"t1.micro": {
 		InstanceType: "t1.micro",
 		VCPU:         1,
@ -3581,6 +3896,20 @@ var InstanceTypes = map[string]*InstanceType{
 		GPU:          0,
 		Architecture: "arm64",
 	},
+	"trn1.2xlarge": {
+		InstanceType: "trn1.2xlarge",
+		VCPU:         8,
+		MemoryMb:     32768,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"trn1.32xlarge": {
+		InstanceType: "trn1.32xlarge",
+		VCPU:         128,
+		MemoryMb:     524288,
+		GPU:          0,
+		Architecture: "amd64",
+	},
 	"u-12tb1.112xlarge": {
 		InstanceType: "u-12tb1.112xlarge",
 		VCPU:         448,
@ -3588,6 +3917,20 @@ var InstanceTypes = map[string]*InstanceType{
 		GPU:          0,
 		Architecture: "amd64",
 	},
+	"u-18tb1.112xlarge": {
+		InstanceType: "u-18tb1.112xlarge",
+		VCPU:         448,
+		MemoryMb:     18874368,
+		GPU:          0,
+		Architecture: "amd64",
+	},
+	"u-24tb1.112xlarge": {
+		InstanceType: "u-24tb1.112xlarge",
+		VCPU:         448,
+		MemoryMb:     25165824,
+		GPU:          0,
+		Architecture: "amd64",
+	},
 	"u-3tb1.56xlarge": {
 		InstanceType: "u-3tb1.56xlarge",
 		VCPU:         224,
--- a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go
+++ b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go
@ -25,8 +25,11 @@ import (
 	"os"
 	"time"

+	"k8s.io/klog/v2"
+
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws"
-	klog "k8s.io/klog/v2"
+	awssdk "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
 )

 var packageTemplate = template.Must(template.New("").Parse(`/*
@ -78,12 +81,22 @@ var InstanceTypes = map[string]*InstanceType{
 // Please note that the IAM user running the static instance types generator must be
 // a non-anonymous user with privileges to call the DescribeInstanceTypes EC2 API.
 func main() {
-	var region = flag.String("region", "", "aws region you'd like to generate instances from."+
-		"It will populate list from all regions if region is not specified.")
+	var region = flag.String("region", "", "aws region you'd like to generate instances from.")
 	flag.Parse()
+	if awssdk.StringValue(region) == "" {
+		klog.Fatalf("Region is required to generate instance types")
+	}
+
 	defer klog.Flush()

-	instanceTypes, err := aws.GenerateEC2InstanceTypes(*region)
+	sess, err := session.NewSession(&awssdk.Config{
+		Region: region,
+	})
+	if err != nil {
+		klog.Fatal(err)
+	}
+
+	instanceTypes, err := aws.GenerateEC2InstanceTypes(sess)
 	if err != nil {
 		klog.Fatal(err)
 	}
--- a/cluster-autoscaler/cloudprovider/aws/managed_nodegroup_cache_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/managed_nodegroup_cache_test.go
@ -125,10 +125,11 @@ func TestGetManagedNodegroupNoTaintsOrLabels(t *testing.T) {
 	assert.Equal(t, cacheObj.name, nodegroupName)
 	assert.Equal(t, cacheObj.clusterName, clusterName)
 	assert.Equal(t, len(cacheObj.taints), 0)
-	assert.Equal(t, len(cacheObj.labels), 3)
+	assert.Equal(t, len(cacheObj.labels), 4)
 	assert.Equal(t, cacheObj.labels["amiType"], amiType)
 	assert.Equal(t, cacheObj.labels["capacityType"], capacityType)
 	assert.Equal(t, cacheObj.labels["k8sVersion"], k8sVersion)
+	assert.Equal(t, cacheObj.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
 }

 func TestGetManagedNodegroupWithTaintsAndLabels(t *testing.T) {
@ -194,13 +195,14 @@ func TestGetManagedNodegroupWithTaintsAndLabels(t *testing.T) {
 	assert.Equal(t, cacheObj.taints[1].Effect, apiv1.TaintEffect(taintEffect2))
 	assert.Equal(t, cacheObj.taints[1].Key, taintKey2)
 	assert.Equal(t, cacheObj.taints[1].Value, taintValue2)
-	assert.Equal(t, len(cacheObj.labels), 6)
+	assert.Equal(t, len(cacheObj.labels), 7)
 	assert.Equal(t, cacheObj.labels[labelKey1], labelValue1)
 	assert.Equal(t, cacheObj.labels[labelKey2], labelValue2)
 	assert.Equal(t, cacheObj.labels["diskSize"], strconv.FormatInt(diskSize, 10))
 	assert.Equal(t, cacheObj.labels["amiType"], amiType)
 	assert.Equal(t, cacheObj.labels["capacityType"], capacityType)
 	assert.Equal(t, cacheObj.labels["k8sVersion"], k8sVersion)
+	assert.Equal(t, cacheObj.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
 }

 func TestGetManagedNodegroupInfoObjectWithError(t *testing.T) {
@ -294,13 +296,14 @@ func TestGetManagedNodegroupInfoObjectNoCachedNodegroup(t *testing.T) {

 	mngInfoObject, err := c.getManagedNodegroupInfoObject(nodegroupName, clusterName)
 	require.NoError(t, err)
-	assert.Equal(t, len(mngInfoObject.labels), 6)
+	assert.Equal(t, len(mngInfoObject.labels), 7)
 	assert.Equal(t, mngInfoObject.labels[labelKey1], labelValue1)
 	assert.Equal(t, mngInfoObject.labels[labelKey2], labelValue2)
 	assert.Equal(t, mngInfoObject.labels["diskSize"], strconv.FormatInt(diskSize, 10))
 	assert.Equal(t, mngInfoObject.labels["amiType"], amiType)
 	assert.Equal(t, mngInfoObject.labels["capacityType"], capacityType)
 	assert.Equal(t, mngInfoObject.labels["k8sVersion"], k8sVersion)
+	assert.Equal(t, mngInfoObject.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
 	k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
 		ClusterName:   &clusterName,
 		NodegroupName: &nodegroupName,
@ -377,13 +380,14 @@ func TestGetManagedNodegroupLabelsNoCachedNodegroup(t *testing.T) {

 	labelsMap, err := c.getManagedNodegroupLabels(nodegroupName, clusterName)
 	require.NoError(t, err)
-	assert.Equal(t, len(labelsMap), 6)
+	assert.Equal(t, len(labelsMap), 7)
 	assert.Equal(t, labelsMap[labelKey1], labelValue1)
 	assert.Equal(t, labelsMap[labelKey2], labelValue2)
 	assert.Equal(t, labelsMap["diskSize"], strconv.FormatInt(diskSize, 10))
 	assert.Equal(t, labelsMap["amiType"], amiType)
 	assert.Equal(t, labelsMap["capacityType"], capacityType)
 	assert.Equal(t, labelsMap["k8sVersion"], k8sVersion)
+	assert.Equal(t, labelsMap["eks.amazonaws.com/nodegroup"], nodegroupName)
 	k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
 		ClusterName:   &clusterName,
 		NodegroupName: &nodegroupName,
@ -471,13 +475,14 @@ func TestGetManagedNodegroupLabelsWithCachedNodegroupThatExpires(t *testing.T) {
 	// Query for nodegroup entry after it expires - should have the new labels added
 	newLabelsMap, err := c.getManagedNodegroupLabels(nodegroupName, clusterName)
 	require.NoError(t, err)
-	assert.Equal(t, len(newLabelsMap), 6)
+	assert.Equal(t, len(newLabelsMap), 7)
 	assert.Equal(t, newLabelsMap[labelKey1], labelValue1)
 	assert.Equal(t, newLabelsMap[labelKey2], labelValue2)
 	assert.Equal(t, newLabelsMap["diskSize"], strconv.FormatInt(diskSize, 10))
 	assert.Equal(t, newLabelsMap["amiType"], amiType)
 	assert.Equal(t, newLabelsMap["capacityType"], capacityType)
 	assert.Equal(t, newLabelsMap["k8sVersion"], k8sVersion)
+	assert.Equal(t, newLabelsMap["eks.amazonaws.com/nodegroup"], nodegroupName)
 	k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
 		ClusterName:   &clusterName,
 		NodegroupName: &nodegroupName,
--- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go
@ -106,6 +106,11 @@ func (azure *AzureCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovid
 	return azure.azureManager.GetNodeGroupForInstance(ref)
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (azure *AzureCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 func (azure *AzureCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-aks.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-aks.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-autodiscover.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-autodiscover.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard-control-plane.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard-control-plane.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard-msi.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard-msi.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-standard.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss-control-plane.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss-control-plane.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss-msi.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss-msi.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss.yaml
+++ b/cluster-autoscaler/cloudprovider/azure/examples/cluster-autoscaler-vmss.yaml
@ -51,7 +51,7 @@ rules:
    resources: ["statefulsets", "replicasets", "daemonsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses", "csinodes"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["get", "list", "watch"]
  - apiGroups: ["batch"]
    resources: ["jobs", "cronjobs"]
--- a/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/baiducloud/baiducloud_cloud_provider.go
@ -180,6 +180,11 @@ func (baiducloud *baiducloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (c
 	return asg, err
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (baiducloud *baiducloudCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 // Implementation optional.
 func (baiducloud *baiducloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/bizflycloud/bizflycloud_cloud_provider.go
@ -104,6 +104,11 @@ func (d *bizflycloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprov
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (d *bizflycloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not
 // available. Implementation optional.
 func (d *bizflycloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/brightbox/brightbox_cloud_provider.go
@ -81,6 +81,11 @@ func (b *brightboxCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovid
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (b *brightboxCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Refresh is before every main loop and can be used to dynamically
 // update cloud provider state.
 // In particular the list of node groups returned by NodeGroups can
--- a/cluster-autoscaler/cloudprovider/cherryservers/cherry_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_cloud_provider.go
@ -122,6 +122,11 @@ func (ccp *cherryCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (ccp *cherryCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 func (ccp *cherryCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
--- a/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest.go
+++ b/cluster-autoscaler/cloudprovider/cherryservers/cherry_manager_rest.go
@ -607,8 +607,6 @@ func (mgr *cherryManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, upd
 func BuildGenericLabels(nodegroup string, plan *Plan) map[string]string {
 	result := make(map[string]string)

-	//result[kubeletapis.LabelArch] = "amd64"
-	//result[kubeletapis.LabelOS] = "linux"
 	result[apiv1.LabelInstanceType] = plan.Name
 	//result[apiv1.LabelZoneRegion] = ""
 	//result[apiv1.LabelZoneFailureDomain] = "0"
--- a/cluster-autoscaler/cloudprovider/civo/civo_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/civo/civo_cloud_provider.go
@ -99,6 +99,11 @@ func (d *civoCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.No
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (d *civoCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not
 // available. Implementation optional.
 func (d *civoCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloud_provider.go
@ -100,6 +100,10 @@ type CloudProvider interface {
 	// occurred. Must be implemented.
 	NodeGroupForNode(*apiv1.Node) (NodeGroup, error)

+	// HasInstance returns whether the node has corresponding instance in cloud provider,
+	// true if the node has an instance, false if it no longer exists
+	HasInstance(*apiv1.Node) (bool, error)
+
 	// Pricing returns pricing model for this cloud provider or error if not available.
 	// Implementation optional.
 	Pricing() (PricingModel, errors.AutoscalerError)
--- a/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloudstack/cloudstack_cloud_provider.go
@ -68,6 +68,11 @@ func (provider *cloudStackCloudProvider) NodeGroupForNode(node *v1.Node) (cloudp
 	return provider.manager.clusterForNode(node)
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (provider *cloudStackCloudProvider) HasInstance(node *v1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
 func (provider *cloudStackCloudProvider) Cleanup() error {
 	return provider.manager.cleanup()
--- a/cluster-autoscaler/cloudprovider/clusterapi/README.md
+++ b/cluster-autoscaler/cloudprovider/clusterapi/README.md
@ -5,6 +5,30 @@ the [cluster-api project](https://github.com/kubernetes-sigs/cluster-api) to
 manage the provisioning and de-provisioning of nodes within a Kubernetes
 cluster.

+## Table of Contents:
+<!-- TOC BEGIN -->
+* [Kubernetes Version](#kubernetes-version)
+* [Starting the Autoscaler](#starting-the-autoscaler)
+* [Configuring node group auto discovery](#configuring-node-group-auto-discovery)
+* [Connecting cluster-autoscaler to Cluster API management and workload Clusters](#connecting-cluster-autoscaler-to-cluster-api-management-and-workload-clusters)
+  * [Autoscaler running in a joined cluster using service account credentials](#autoscaler-running-in-a-joined-cluster-using-service-account-credentials)
+  * [Autoscaler running in workload cluster using service account credentials, with separate management cluster](#autoscaler-running-in-workload-cluster-using-service-account-credentials-with-separate-management-cluster)
+  * [Autoscaler running in management cluster using service account credentials, with separate workload cluster](#autoscaler-running-in-management-cluster-using-service-account-credentials-with-separate-workload-cluster)
+  * [Autoscaler running anywhere, with separate kubeconfigs for management and workload clusters](#autoscaler-running-anywhere-with-separate-kubeconfigs-for-management-and-workload-clusters)
+  * [Autoscaler running anywhere, with a common kubeconfig for management and workload clusters](#autoscaler-running-anywhere-with-a-common-kubeconfig-for-management-and-workload-clusters)
+* [Enabling Autoscaling](#enabling-autoscaling)
+  * [Scale from zero support](#scale-from-zero-support)
+    * [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero)
+    * [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero)
+* [Specifying a Custom Resource Group](#specifying-a-custom-resource-group)
+* [Specifying a Custom Resource Version](#specifying-a-custom-resource-version)
+* [Sample manifest](#sample-manifest)
+  * [A note on permissions](#a-note-on-permissions)
+* [Autoscaling with ClusterClass and Managed Topologies](#autoscaling-with-clusterclass-and-managed-topologies)
+* [Special note on GPU instances](#special-note-on-gpu-instances)
+* [Special note on balancing similar node groups](#special-note-on-balancing-similar-node-groups)
+<!-- TOC END -->
+
 ## Kubernetes Version

 The cluster-api provider requires Kubernetes v1.16 or greater to run the
@ -322,3 +346,84 @@ spec:
 **Warning**: If the Autoscaler is enabled **and** the replicas field is set for a `MachineDeployment` or `MachineSet` the Cluster may enter a broken state where replicas become unpredictable.

 If the replica field is unset in the Cluster definition Autoscaling can be enabled [as described above](#enabling-autoscaling)
+
+## Special note on GPU instances
+
+As with other providers, if the device plugin on nodes that provides GPU
+resources takes some time to advertise the GPU resource to the cluster, this
+may cause Cluster Autoscaler to unnecessarily scale out multiple times.
+
+To avoid this, you can configure `kubelet` on your GPU nodes to label the node
+before it joins the cluster by passing it the `--node-labels` flag. For the
+CAPI cloudprovider, the label format is as follows:
+
+`cluster-api/accelerator=<gpu-type>`
+
+`<gpu-type>` is arbitrary.
+
+It is important to note that if you are using the `--gpu-total` flag to limit the number
+of GPU resources in your cluster that the `<gpu-type>` value must match
+between the command line flag and the node labels. Setting these values incorrectly
+can lead to the autoscaler creating too many GPU resources.
+
+For example, if you are using the autoscaler command line flag
+`--gpu-total=gfx-hardware:1:2` to limit the number of `gfx-hardware` resources
+to a minimum of 1 and maximum of 2, then you should use the kubelet node label flag
+`--node-labels=cluster-api/accelerator=gfx-hardware`.
+
+## Special note on balancing similar node groups
+
+The Cluster Autoscaler feature to enable balancing similar node groups
+(activated with the `--balance-similar-node-groups` flag) is a powerful and
+popular feature. When enabled, the Cluster Autoscaler will attempt to create
+new nodes by adding them in a manner that balances the creation between
+similar node groups. With Cluster API, these node groups correspond directly
+to the scalable resources associated (usually MachineDeployments and MachineSets)
+with the nodes in question. In order for the nodes of these scalable resources
+to be considered similar by the Cluster Autoscaler, they must have the same
+capacity, labels, and taints for the nodes which will be created from them.
+
+To help assist the Cluster Autoscaler in determining which node groups are
+similar, the command line flags `--balancing-ignore-label` and
+`--balancing-label` are provided. For an expanded discussion about balancing
+similar node groups and the options which are available, please see the
+[Cluster Autoscaler FAQ](../../FAQ.md).
+
+Because Cluster API can address many different cloud providers, it is important
+to configure the balancing labels to ignore provider-specific labels which
+are used for carrying zonal information on Kubernetes nodes. The Cluster
+Autoscaler implementation for Cluster API does not assume any labels (aside from
+the [well-known Kubernetes labels](https://kubernetes.io/docs/reference/labels-annotations-taints/))
+to be ignored when running. Users must configure their Cluster Autoscaler deployment
+to ignore labels which might be different between nodes, but which do not
+otherwise affect node behavior or size (for example when two MachineDeployments
+are the same except for their deployment zones). The Cluster API community has
+decided not to carry cloud provider specific labels in the Cluster Autoscaler
+to reduce the possibility for labels to clash between providers. Additionally,
+the community has agreed to promote documentation and the use of the `--balancing-ignore-label`
+flag as the preferred method of deployment to reduce the extended need for
+maintenance on the Cluster Autoscaler when new providers are added or updated.
+For further context around this decision, please see the
+[Cluster API Deep Dive into Cluster Autoscaler Node Group Balancing discussion from 2022-09-12](https://www.youtube.com/watch?v=jbhca_9oPuQ&t=5s).
+
+The following table shows some of the most common labels used by cloud providers
+to designate regional or zonal information on Kubernetes nodes. It is shared
+here as a reference for users who might be deploying on these infrastructures.
+
+| Cloud Provider | Label to ignore | Notes |
+| --- | --- | --- |
+| Alibaba Cloud | `topology.diskplugin.csi.alibabacloud.com/zone` | Used by the Alibaba Cloud CSI driver as a target for persistent volume node affinity |
+| AWS | `alpha.eksctl.io/instance-id` | Used by `eksctl` to identify instances |
+| AWS | `alpha.eksctl.io/nodegroup-name` | Used by `eksctl` to identify node group names |
+| AWS | `eks.amazonaws.com/nodegroup` | Used by EKS to identify node groups |
+| AWS | `k8s.amazonaws.com/eniConfig` | Used by the AWS CNI for custom networking |
+| AWS | `lifecycle` | Used by AWS as a label for spot instances |
+| AWS | `topology.ebs.csi.aws.com/zone` | Used by the AWS EBS CSI driver as a target for persistent volume node affinity |
+| Azure | `topology.disk.csi.azure.com/zone` | Used as the topology key by the Azure Disk CSI driver |
+| Azure | `agentpool` | Legacy label used to specify to which Azure node pool a particular node belongs |
+| Azure | `kubernetes.azure.com/agentpool` | Used by AKS to identify to which node pool a particular node belongs |
+| GCE | `topology.gke.io/zone` | Used to specify the zone of the node |
+| IBM Cloud | `ibm-cloud.kubernetes.io/worker-id` | Used by the IBM Cloud Cloud Controller Manager to identify the node |
+| IBM Cloud | `vpc-block-csi-driver-labels` | Used by the IBM Cloud CSI driver as a target for persistent volume node affinity |
+| IBM Cloud | `ibm-cloud.kubernetes.io/vpc-instance-id` | Used when a VPC is in use on IBM Cloud |
+
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup.go
@ -25,7 +25,6 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
-	kubeletapis "k8s.io/kubelet/pkg/apis"
 	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"

 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@ -370,10 +369,8 @@ func buildGenericLabels(nodeName string) map[string]string {
 	// TODO revisit this function and add an explanation about what these
 	// labels are used for, or remove them if not necessary
 	m := make(map[string]string)
-	m[kubeletapis.LabelArch] = cloudprovider.DefaultArch
 	m[corev1.LabelArchStable] = cloudprovider.DefaultArch

-	m[kubeletapis.LabelOS] = cloudprovider.DefaultOS
 	m[corev1.LabelOSStable] = cloudprovider.DefaultOS

 	m[corev1.LabelHostname] = nodeName
@ -387,10 +384,8 @@ func extractNodeLabels(node *corev1.Node) map[string]string {
 		return m
 	}

-	setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelArch)
 	setLabelIfNotEmpty(m, node.Labels, corev1.LabelArchStable)

-	setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelOS)
 	setLabelIfNotEmpty(m, node.Labels, corev1.LabelOSStable)

 	setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceType)
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
@ -1310,9 +1310,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				expectedErr: nil,
 				nodeLabels: map[string]string{
 					"kubernetes.io/os":   "linux",
-					"beta.kubernetes.io/os":   "linux",
 					"kubernetes.io/arch": "amd64",
-					"beta.kubernetes.io/arch": "amd64",
 				},
 				expectedCapacity: map[corev1.ResourceName]int64{
 					corev1.ResourceCPU:        2,
@ -1322,9 +1320,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				},
 				expectedNodeLabels: map[string]string{
 					"kubernetes.io/os":       "linux",
-					"beta.kubernetes.io/os":   "linux",
 					"kubernetes.io/arch":     "amd64",
-					"beta.kubernetes.io/arch": "amd64",
 					"kubernetes.io/hostname": "random value",
 				},
 			},
@ -1340,9 +1336,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				expectedErr:  nil,
 				nodeLabels: map[string]string{
 					"kubernetes.io/os":                 "windows",
-					"beta.kubernetes.io/os":            "windows",
 					"kubernetes.io/arch":               "arm64",
-					"beta.kubernetes.io/arch":          "arm64",
 					"node.kubernetes.io/instance-type": "instance1",
 				},
 				expectedCapacity: map[corev1.ResourceName]int64{
@ -1353,9 +1347,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				expectedNodeLabels: map[string]string{
 					"kubernetes.io/hostname":           "random value",
 					"kubernetes.io/os":                 "windows",
-					"beta.kubernetes.io/os":            "windows",
 					"kubernetes.io/arch":               "arm64",
-					"beta.kubernetes.io/arch":          "arm64",
 					"node.kubernetes.io/instance-type": "instance1",
 				},
 			},
--- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go
+++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_provider.go
@ -81,6 +81,11 @@ func (p *provider) NodeGroupForNode(node *corev1.Node) (cloudprovider.NodeGroup,
 	return ng, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
 }
--- a/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/digitalocean/digitalocean_cloud_provider.go
@ -101,6 +101,11 @@ func (d *digitaloceanCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpro
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (d *digitaloceanCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not
 // available. Implementation optional.
 func (d *digitaloceanCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/exoscale/exoscale_cloud_provider.go
@ -131,6 +131,11 @@ func (e *exoscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
 	return nodeGroup, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (e *exoscaleCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 // Implementation optional.
 func (e *exoscaleCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/externalgrpc/externalgrpc_cloud_provider.go
@ -134,6 +134,11 @@ func (e *externalGrpcCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpro
 	return ng, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (e *externalGrpcCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // pricingModel implements cloudprovider.PricingModel interface.
 type pricingModel struct {
 	client protos.CloudProviderClient
--- a/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_cloud_provider.go
@ -101,6 +101,11 @@ func (gce *GceCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
 	return mig, err
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (gce *GceCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 func (gce *GceCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return gce.pricingModel, nil
@ -177,7 +182,6 @@ type Mig interface {
 	cloudprovider.NodeGroup

 	GceRef() GceRef
-	Version() string
 }

 type gceMig struct {
@ -188,11 +192,6 @@ type gceMig struct {
 	maxSize    int
 }

-// Version return the Mig version.
-func (mig *gceMig) Version() string {
-	return ""
-}
-
 // GceRef returns Mig's GceRef
 func (mig *gceMig) GceRef() GceRef {
 	return mig.gceRef
--- a/cluster-autoscaler/cloudprovider/gce/gce_manager.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_manager.go
@ -589,7 +589,11 @@ func (m *gceManagerImpl) GetMigTemplateNode(mig Mig) (*apiv1.Node, error) {
 	if err != nil {
 		return nil, err
 	}
-	return m.templates.BuildNodeFromTemplate(mig, template, machineType.CPU, machineType.Memory, nil, m.reserved)
+	migOsInfo, err := m.templates.MigOsInfo(mig.Id(), template)
+	if err != nil {
+		return nil, err
+	}
+	return m.templates.BuildNodeFromTemplate(mig, migOsInfo, template, machineType.CPU, machineType.Memory, nil, m.reserved)
 }

 // parseMIGAutoDiscoverySpecs returns any provided NodeGroupAutoDiscoverySpecs
--- a/cluster-autoscaler/cloudprovider/gce/gce_reserved.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_reserved.go
@ -87,7 +87,10 @@ type GceReserved struct{}

 // CalculateKernelReserved computes how much memory Linux kernel will reserve.
 // TODO(jkaniuk): account for crashkernel reservation on RHEL / CentOS
-func (r *GceReserved) CalculateKernelReserved(physicalMemory int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64 {
+func (r *GceReserved) CalculateKernelReserved(m MigOsInfo, physicalMemory int64) int64 {
+	os := m.Os()
+	osDistribution := m.OsDistribution()
+	arch := m.Arch()
 	switch os {
 	case OperatingSystemLinux:
 		// Account for memory reserved by kernel
@ -267,7 +270,9 @@ func EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(diskCount int64, osDist
 }

 // CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold
-func (r *GceReserved) CalculateOSReservedEphemeralStorage(diskSize int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64 {
+func (r *GceReserved) CalculateOSReservedEphemeralStorage(m MigOsInfo, diskSize int64) int64 {
+	osDistribution := m.OsDistribution()
+	arch := m.Arch()
 	switch osDistribution {
 	case OperatingSystemDistributionCOS:
 		storage := int64(math.Ceil(0.015635*float64(diskSize))) + int64(math.Ceil(4.148*GiB)) // os partition estimation
@ -289,3 +294,30 @@ func (r *GceReserved) CalculateOSReservedEphemeralStorage(diskSize int64, os Ope
 		return 0
 	}
 }
+
+// GceMigOsInfo contains os details of nodes in gce mig.
+type GceMigOsInfo struct {
+	os             OperatingSystem
+	osDistribution OperatingSystemDistribution
+	arch           SystemArchitecture
+}
+
+// Os return operating system.
+func (m *GceMigOsInfo) Os() OperatingSystem {
+	return m.os
+}
+
+// OsDistribution return operating system distribution.
+func (m *GceMigOsInfo) OsDistribution() OperatingSystemDistribution {
+	return m.osDistribution
+}
+
+// Arch return system architecture.
+func (m *GceMigOsInfo) Arch() SystemArchitecture {
+	return m.arch
+}
+
+// NewMigOsInfo return gce implementation of MigOsInfo interface.
+func NewMigOsInfo(os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture) MigOsInfo {
+	return &GceMigOsInfo{os, osDistribution, arch}
+}
--- a/cluster-autoscaler/cloudprovider/gce/gce_reserved_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/gce_reserved_test.go
@ -108,7 +108,8 @@ func TestCalculateKernelReservedLinux(t *testing.T) {
 	for idx, tc := range testCases {
 		r := &GceReserved{}
 		t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
-			reserved := r.CalculateKernelReserved(tc.physicalMemory, OperatingSystemLinux, tc.osDistribution, tc.arch, "")
+			m := NewMigOsInfo(OperatingSystemLinux, tc.osDistribution, tc.arch)
+			reserved := r.CalculateKernelReserved(m, tc.physicalMemory)
 			if tc.osDistribution == OperatingSystemDistributionUbuntu {
 				assert.Equal(t, tc.reservedMemory+int64(math.Min(correctionConstant*float64(tc.physicalMemory), maximumCorrectionValue)+ubuntuSpecificOffset), reserved)
 			} else if tc.osDistribution == OperatingSystemDistributionCOS {
--- a/cluster-autoscaler/cloudprovider/gce/os_reserved.go
+++ b/cluster-autoscaler/cloudprovider/gce/os_reserved.go
@ -16,13 +16,23 @@ limitations under the License.

 package gce

+// MigOsInfo store os parameters.
+type MigOsInfo interface {
+	// Os return operating system.
+	Os() OperatingSystem
+	// OsDistribution return operating system distribution.
+	OsDistribution() OperatingSystemDistribution
+	// Arch return system architecture.
+	Arch() SystemArchitecture
+}
+
 // OsReservedCalculator calculates the OS reserved values.
 type OsReservedCalculator interface {
 	// CalculateKernelReserved computes how much memory OS kernel will reserve.
 	// NodeVersion parameter is optional. If empty string is passed a result calculated using default node version will be returned.
-	CalculateKernelReserved(physicalMemory int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64
+	CalculateKernelReserved(m MigOsInfo, physicalMemory int64) int64

 	// CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold.
 	// NodeVersion parameter is optional. If empty string is passed a result calculated using default node version will be returned.
-	CalculateOSReservedEphemeralStorage(diskSize int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64
+	CalculateOSReservedEphemeralStorage(m MigOsInfo, diskSize int64) int64
 }
--- a/cluster-autoscaler/cloudprovider/gce/templates.go
+++ b/cluster-autoscaler/cloudprovider/gce/templates.go
@ -71,8 +71,8 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator
 }

 // BuildCapacity builds a list of resource capacities given list of hardware.
-func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture,
-	ephemeralStorage int64, ephemeralStorageLocalSSDCount int64, pods *int64, version string, r OsReservedCalculator, extendedResources apiv1.ResourceList) (apiv1.ResourceList, error) {
+func (t *GceTemplateBuilder) BuildCapacity(m MigOsInfo, cpu int64, mem int64, accelerators []*gce.AcceleratorConfig,
+	ephemeralStorage int64, ephemeralStorageLocalSSDCount int64, pods *int64, r OsReservedCalculator, extendedResources apiv1.ResourceList) (apiv1.ResourceList, error) {
 	capacity := apiv1.ResourceList{}
 	if pods == nil {
 		capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
@ -81,7 +81,7 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
 	}

 	capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
-	memTotal := mem - r.CalculateKernelReserved(mem, os, osDistribution, arch, version)
+	memTotal := mem - r.CalculateKernelReserved(m, mem)
 	capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI)

 	if accelerators != nil && len(accelerators) > 0 {
@ -91,9 +91,9 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
 	if ephemeralStorage > 0 {
 		var storageTotal int64
 		if ephemeralStorageLocalSSDCount > 0 {
-			storageTotal = ephemeralStorage - EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(ephemeralStorageLocalSSDCount, osDistribution)
+			storageTotal = ephemeralStorage - EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(ephemeralStorageLocalSSDCount, m.OsDistribution())
 		} else {
-			storageTotal = ephemeralStorage - r.CalculateOSReservedEphemeralStorage(ephemeralStorage, os, osDistribution, arch, version)
+			storageTotal = ephemeralStorage - r.CalculateOSReservedEphemeralStorage(m, ephemeralStorage)
 		}
 		capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(math.Max(float64(storageTotal), 0)), resource.DecimalSI)
 	}
@ -160,8 +160,31 @@ func getKubeEnvValueFromTemplateMetadata(template *gce.InstanceTemplate) (string
 	return "", nil
 }

+// MigOsInfo return os detailes information that stored in template.
+func (t *GceTemplateBuilder) MigOsInfo(migId string, template *gce.InstanceTemplate) (MigOsInfo, error) {
+	kubeEnvValue, err := getKubeEnvValueFromTemplateMetadata(template)
+	if err != nil {
+		return nil, fmt.Errorf("could not obtain kube-env from template metadata; %v", err)
+	}
+	os := extractOperatingSystemFromKubeEnv(kubeEnvValue)
+	if os == OperatingSystemUnknown {
+		return nil, fmt.Errorf("could not obtain os from kube-env from template metadata")
+	}
+
+	osDistribution := extractOperatingSystemDistributionFromKubeEnv(kubeEnvValue)
+	if osDistribution == OperatingSystemDistributionUnknown {
+		return nil, fmt.Errorf("could not obtain os-distribution from kube-env from template metadata")
+	}
+	arch, err := extractSystemArchitectureFromKubeEnv(kubeEnvValue)
+	if err != nil {
+		arch = DefaultArch
+		klog.Errorf("Couldn't extract architecture from kube-env for MIG %q, falling back to %q. Error: %v", migId, arch, err)
+	}
+	return NewMigOsInfo(os, osDistribution, arch), nil
+}
+
 // BuildNodeFromTemplate builds node from provided GCE template.
-func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.InstanceTemplate, cpu int64, mem int64, pods *int64, reserved OsReservedCalculator) (*apiv1.Node, error) {
+func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, migOsInfo MigOsInfo, template *gce.InstanceTemplate, cpu int64, mem int64, pods *int64, reserved OsReservedCalculator) (*apiv1.Node, error) {

 	if template.Properties == nil {
 		return nil, fmt.Errorf("instance template %s has no properties", template.Name)
@ -181,22 +204,6 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
 		Labels:   map[string]string{},
 	}

-	// This call is safe even if kubeEnvValue is empty
-	os := extractOperatingSystemFromKubeEnv(kubeEnvValue)
-	if os == OperatingSystemUnknown {
-		return nil, fmt.Errorf("could not obtain os from kube-env from template metadata")
-	}
-
-	osDistribution := extractOperatingSystemDistributionFromKubeEnv(kubeEnvValue)
-	if osDistribution == OperatingSystemDistributionUnknown {
-		return nil, fmt.Errorf("could not obtain os-distribution from kube-env from template metadata")
-	}
-	arch, err := extractSystemArchitectureFromKubeEnv(kubeEnvValue)
-	if err != nil {
-		arch = DefaultArch
-		klog.Errorf("Couldn't extract architecture from kube-env for MIG %q, falling back to %q. Error: %v", mig.Id(), arch, err)
-	}
-
 	addBootDiskAnnotations(&node, template.Properties)
 	var ephemeralStorage int64 = -1
 	if !isBootDiskEphemeralStorageWithInstanceTemplateDisabled(kubeEnvValue) {
@ -225,7 +232,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
 		klog.Errorf("could not fetch extended resources from instance template: %v", err)
 	}

-	capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators, os, osDistribution, arch, ephemeralStorage, ephemeralStorageLocalSsdCount, pods, mig.Version(), reserved, extendedResources)
+	capacity, err := t.BuildCapacity(migOsInfo, cpu, mem, template.Properties.GuestAccelerators, ephemeralStorage, ephemeralStorageLocalSsdCount, pods, reserved, extendedResources)
 	if err != nil {
 		return nil, err
 	}
@ -269,7 +276,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
 		node.Status.Allocatable = nodeAllocatable
 	}
 	// GenericLabels
-	labels, err := BuildGenericLabels(mig.GceRef(), template.Properties.MachineType, nodeName, os, arch)
+	labels, err := BuildGenericLabels(mig.GceRef(), template.Properties.MachineType, nodeName, migOsInfo.Os(), migOsInfo.Arch())
 	if err != nil {
 		return nil, err
 	}
--- a/cluster-autoscaler/cloudprovider/gce/templates_test.go
+++ b/cluster-autoscaler/cloudprovider/gce/templates_test.go
@ -65,7 +65,8 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 		ephemeralStorageLocalSSDCount int64
 		extendedResources             apiv1.ResourceList
 		// test outputs
-		expectedErr bool
+		expectedMigInfoErr      bool
+		expectedNodeTemplateErr bool
 	}
 	testCases := []testCase{
 		{
@ -86,7 +87,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedCpu:              "1000m",
 			reservedMemory:           fmt.Sprintf("%v", 1*units.MiB),
 			reservedEphemeralStorage: "30Gi",
-			expectedErr:              false,
 		},
 		{
 			scenario: "no kube-reserved in kube-env",
@ -97,18 +97,16 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			physicalCpu:    8,
 			physicalMemory: 200 * units.MiB,
 			kubeReserved:   false,
-			expectedErr:    false,
 		}, {
 			scenario:       "no kube-env at all",
 			kubeEnv:        "",
 			physicalCpu:    8,
 			physicalMemory: 200 * units.MiB,
 			kubeReserved:   false,
-			expectedErr:    false,
 		}, {
 			scenario:           "totally messed up kube-env",
 			kubeEnv:            "This kube-env is totally messed up",
-			expectedErr: true,
+			expectedMigInfoErr: true,
 		}, {
 			scenario:       "max pods per node specified",
 			kubeEnv:        "",
@ -116,7 +114,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			physicalMemory: 200 * units.MiB,
 			pods:           &thirtyPodsPerNode,
 			kubeReserved:   false,
-			expectedErr:    false,
 		},
 		{
 			scenario: "BLOCK_EPH_STORAGE_BOOT_DISK in kube-env",
@ -133,7 +130,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedEphemeralStorage:  "0Gi",
 			kubeReserved:              true,
 			isEphemeralStorageBlocked: true,
-			expectedErr:               false,
 		},
 		{
 			scenario: "BLOCK_EPH_STORAGE_BOOT_DISK is false in kube-env",
@ -146,14 +142,13 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedMemory:           fmt.Sprintf("%v", 0*units.MiB),
 			reservedEphemeralStorage: "0Gi",
 			kubeReserved:             true,
-			expectedErr:              false,
 		},
 		{
 			scenario:                      "more local SSDs requested for ephemeral storage than attached",
 			kubeEnv:                       "AUTOSCALER_ENV_VARS: os_distribution=cos;os=linux;ephemeral_storage_local_ssd_count=1\n",
 			ephemeralStorageLocalSSDCount: 1,
 			attachedLocalSSDCount:         0,
-			expectedErr:                   true,
+			expectedNodeTemplateErr:       true,
 		},
 		{
 			scenario:                      "all attached local SSDs requested for ephemeral storage",
@ -163,7 +158,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			bootDiskSizeGiB:               300,
 			ephemeralStorageLocalSSDCount: 2,
 			attachedLocalSSDCount:         2,
-			expectedErr:                   false,
 		},
 		{
 			scenario:                      "more local SSDs attached than requested for ephemeral storage",
@ -172,7 +166,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			physicalMemory:                200 * units.MiB,
 			ephemeralStorageLocalSSDCount: 2,
 			attachedLocalSSDCount:         4,
-			expectedErr:                   false,
 		},
 		{
 			scenario:                      "ephemeral storage on local SSDs with kube-reserved",
@ -185,7 +178,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedMemory:                fmt.Sprintf("%v", 0*units.MiB),
 			reservedEphemeralStorage:      "10Gi",
 			attachedLocalSSDCount:         4,
-			expectedErr:                   false,
 		},
 		{
 			scenario:                      "extended_resources present in kube-env",
@ -198,7 +190,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedMemory:                fmt.Sprintf("%v", 0*units.MiB),
 			reservedEphemeralStorage:      "10Gi",
 			attachedLocalSSDCount:         4,
-			expectedErr:                   false,
 			extendedResources: apiv1.ResourceList{
 				apiv1.ResourceName("someResource"):    *resource.NewQuantity(2, resource.DecimalSI),
 				apiv1.ResourceName("anotherResource"): *resource.NewQuantity(1*units.GB, resource.DecimalSI),
@ -215,7 +206,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			reservedMemory:                fmt.Sprintf("%v", 0*units.MiB),
 			reservedEphemeralStorage:      "10Gi",
 			attachedLocalSSDCount:         4,
-			expectedErr:                   false,
 			extendedResources:             apiv1.ResourceList{},
 		},
 	}
@ -256,8 +246,13 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 			if tc.kubeEnv != "" {
 				template.Properties.Metadata.Items = []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}
 			}
-			node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory, tc.pods, &GceReserved{})
-			if tc.expectedErr {
+			migOsInfo, err := tb.MigOsInfo(mig.Id(), template)
+			if tc.expectedMigInfoErr {
+				assert.Error(t, err)
+				return
+			}
+			node, err := tb.BuildNodeFromTemplate(mig, migOsInfo, template, tc.physicalCpu, tc.physicalMemory, tc.pods, &GceReserved{})
+			if tc.expectedNodeTemplateErr {
 				assert.Error(t, err)
 			} else {
 				assert.NoError(t, err)
@ -286,7 +281,8 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
 				} else if tc.isEphemeralStorageBlocked {
 					physicalEphemeralStorageGiB = 0
 				}
-				capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators, OperatingSystemLinux, OperatingSystemDistributionCOS, "", physicalEphemeralStorageGiB*units.GiB, tc.ephemeralStorageLocalSSDCount, tc.pods, "", &GceReserved{}, tc.extendedResources)
+				migOsInfo := NewMigOsInfo(OperatingSystemLinux, OperatingSystemDistributionCOS, "")
+				capacity, err := tb.BuildCapacity(migOsInfo, tc.physicalCpu, tc.physicalMemory, tc.accelerators, physicalEphemeralStorageGiB*units.GiB, tc.ephemeralStorageLocalSSDCount, tc.pods, &GceReserved{}, tc.extendedResources)
 				assert.NoError(t, err)
 				assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
 				if !tc.kubeReserved {
@ -593,7 +589,8 @@ func TestBuildCapacityMemory(t *testing.T) {
 		t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
 			tb := GceTemplateBuilder{}
 			noAccelerators := make([]*gce.AcceleratorConfig, 0)
-			buildCapacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, noAccelerators, tc.os, OperatingSystemDistributionCOS, "", -1, 0, nil, "", &GceReserved{}, apiv1.ResourceList{})
+			migOsInfo := NewMigOsInfo(tc.os, OperatingSystemDistributionCOS, "")
+			buildCapacity, err := tb.BuildCapacity(migOsInfo, tc.physicalCpu, tc.physicalMemory, noAccelerators, -1, 0, nil, &GceReserved{}, apiv1.ResourceList{})
 			assert.NoError(t, err)
 			expectedCapacity, err := makeResourceList2(tc.physicalCpu, tc.expectedCapacityMemory, 0, 110)
 			assert.NoError(t, err)
@ -1397,7 +1394,11 @@ func TestBuildNodeFromTemplateArch(t *testing.T) {
 				},
 			}
 			tb := &GceTemplateBuilder{}
-			gotNode, gotErr := tb.BuildNodeFromTemplate(mig, template, 16, 128, nil, &GceReserved{})
+			migOsInfo, gotErr := tb.MigOsInfo(mig.Id(), template)
+			if gotErr != nil {
+				t.Fatalf("MigOsInfo unexpected error: %v", gotErr)
+			}
+			gotNode, gotErr := tb.BuildNodeFromTemplate(mig, migOsInfo, template, 16, 128, nil, &GceReserved{})
 			if gotErr != nil {
 				t.Fatalf("BuildNodeFromTemplate unexpected error: %v", gotErr)
 			}
--- a/cluster-autoscaler/cloudprovider/hetzner/OWNERS
+++ b/cluster-autoscaler/cloudprovider/hetzner/OWNERS
@ -1,6 +1,8 @@
-#approvers:
+approvers:
+- apricote
 #- LKaemmerling
 #- 4ND3R50N
-#reviewers:
+reviewers:
+- apricote
 #- LKaemmerling
 #- 4ND3R50N
--- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_cloud_provider.go
@ -99,6 +99,11 @@ func (d *HetznerCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider
 	return group, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (d *HetznerCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not
 // available. Implementation optional.
 func (d *HetznerCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
+++ b/cluster-autoscaler/cloudprovider/hetzner/hetzner_node_group.go
@ -317,6 +317,7 @@ func buildNodeGroupLabels(n *hetznerNodeGroup) map[string]string {
 	return map[string]string{
 		apiv1.LabelInstanceType:      n.instanceType,
 		apiv1.LabelZoneRegionStable:  n.region,
+		"csi.hetzner.cloud/location": n.region,
 		nodeGroupLabel:               n.id,
 	}
 }
--- a/cluster-autoscaler/cloudprovider/huaweicloud/examples/cluster-autoscaler-deployment.yaml
+++ b/cluster-autoscaler/cloudprovider/huaweicloud/examples/cluster-autoscaler-deployment.yaml
@ -31,6 +31,9 @@ spec:
            - --scale-down-delay-after-add=1m0s
            - --scale-down-unneeded-time=1m0s
            - --expander=random
+            - --max-empty-bulk-delete=100
+            - --max-scale-down-parallelism=100
+            - --node-deletion-batcher-interval=10s
          volumeMounts:
            - name: cloud-config
              mountPath: /config
--- a/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_cloud_provider.go
@ -123,6 +123,11 @@ func (hcp *huaweicloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpr
 	return hcp.cloudServiceManager.GetAsgForInstance(instanceID)
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (hcp *huaweicloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available. Not implemented.
 func (hcp *huaweicloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
--- a/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_service_manager.go
+++ b/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_service_manager.go
@ -626,10 +626,57 @@ func (csm *cloudServiceManager) buildNodeFromTemplate(asgName string, template *

 	node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))

+	node.Spec.Taints = extractTaintsFromTags(template.tags)
+
 	node.Status.Conditions = cloudprovider.BuildReadyConditions()
 	return &node, nil
 }

+// extractTaintsFromTags extract taints from as group tags.
+// The tag is of the format "k8s.io_cluster-autoscaler_node-template_taint_<taint-key>". "<taint-key>" is
+// the name of the taint and the value of each tag specifies the taint value and effect with the
+// format "<taint-value>:<taint-effect>".
+// Example tags: "k8s.io_cluster-autoscaler_node-template_taint_dedicated": "true:NoSchedule"
+func extractTaintsFromTags(tags map[string]string) []apiv1.Taint {
+	taints := make([]apiv1.Taint, 0)
+
+	for tagKey, tagValue := range tags {
+		if !strings.Contains(tagKey, "k8s.io_cluster-autoscaler_node-template_taint_") {
+			continue
+		}
+
+		splits := strings.Split(tagKey, "k8s.io_cluster-autoscaler_node-template_taint_")
+		// If the tagKey is 'k8s.io_cluster-autoscaler_node-template_taint_', the second element is '',
+		// this should be ruled out.
+		if len(splits) < 2 || splits[1] == "" {
+			klog.Warningf("Invalid tag key format:%s", tagKey)
+			continue
+		}
+
+		values := strings.Split(tagValue, ":")
+		if len(values) != 2 {
+			klog.Warningf("Invalid tag value format:%s", tagValue)
+			continue
+		}
+
+		if values[1] != string(apiv1.TaintEffectNoSchedule) &&
+			values[1] != string(apiv1.TaintEffectPreferNoSchedule) &&
+			values[1] != string(apiv1.TaintEffectNoExecute) {
+			klog.Warningf("Invalid tag value format:%s", tagValue)
+			continue
+		}
+
+		taints = append(taints, apiv1.Taint{
+			Key:    splits[1],
+			Value:  values[0],
+			Effect: apiv1.TaintEffect(values[1]),
+		})
+		klog.V(6).Infof("Extract taints from tag key/value successfully:%s, %s", tagKey, tagValue)
+	}
+
+	return taints
+}
+
 func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
 	result := make(map[string]string)
 	result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
@ -643,6 +690,10 @@ func buildGenericLabels(template *asgTemplate, nodeName string) map[string]strin

 	// append custom node labels
 	for key, value := range template.tags {
+		// ignore the tag which represents a taint
+		if strings.Contains(key, "k8s.io_cluster-autoscaler_node-template_taint_") {
+			continue
+		}
 		result[key] = value
 	}

--- a/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_service_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud_service_manager_test.go
@ -0,0 +1,164 @@
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package huaweicloud
+
+import (
+	"reflect"
+	"testing"
+
+	apiv1 "k8s.io/api/core/v1"
+
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+)
+
+func Test_extractTaintsFromTags(t *testing.T) {
+	tests := []struct {
+		name string
+		args map[string]string
+		want []apiv1.Taint
+	}{
+		{
+			name: "tag in right format",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
+			},
+			want: []apiv1.Taint{
+				{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
+			},
+		},
+		{
+			name: "empty taint key should be ignored",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_": "bar:NoSchedule",
+			},
+			want: []apiv1.Taint{},
+		},
+		{
+			name: "invalid tag key should be ignored",
+			args: map[string]string{
+				"invalidTagKey": "bar:NoSchedule",
+			},
+			want: []apiv1.Taint{},
+		},
+		{
+			name: "invalid taint effect should be ignored",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:InvalidEffect",
+			},
+			want: []apiv1.Taint{},
+		},
+		{
+			name: "empty taint value",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": ":NoSchedule",
+			},
+			want: []apiv1.Taint{
+				{Key: "foo", Value: "", Effect: apiv1.TaintEffectNoSchedule},
+			},
+		},
+		{
+			name: "one tag with valid tag, one tag with invalid key, ignore the invalid one",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
+				"invalidTagKey": ":NoSchedule",
+			},
+			want: []apiv1.Taint{
+				{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
+			},
+		},
+		{
+			name: "one tag with valid key/value, one tag with invalid value, ignore the invalid one",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
+				"k8s.io_cluster-autoscaler_node-template_taint_bar": "invalidTagValue",
+			},
+			want: []apiv1.Taint{
+				{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
+			},
+		},
+		{
+			name: "one tag with valid key/value, one tag with invalid value length, ignore the invalid one",
+			args: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
+				"k8s.io_cluster-autoscaler_node-template_taint_bar": "foo:NoSchedule:more",
+			},
+			want: []apiv1.Taint{
+				{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := extractTaintsFromTags(tt.args); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("extractTaintsFromTags() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func Test_buildGenericLabels(t *testing.T) {
+	template := &asgTemplate{
+		name:   "foo",
+		region: "foo",
+		zone:   "foo",
+	}
+	tests := []struct {
+		name string
+		tags map[string]string
+		want map[string]string
+	}{
+		{
+			name: "tags contain taints key, ignore it when extract labels",
+			tags: map[string]string{
+				"k8s.io_cluster-autoscaler_node-template_taint_foo": "true:PreferNoSchedule",
+				"foo": "bar",
+			},
+			want: map[string]string{
+				apiv1.LabelArchStable:         cloudprovider.DefaultArch,
+				apiv1.LabelOSStable:           cloudprovider.DefaultOS,
+				apiv1.LabelInstanceTypeStable: template.name,
+				apiv1.LabelTopologyRegion:     template.region,
+				apiv1.LabelTopologyZone:       template.zone,
+				apiv1.LabelHostname:           "foo",
+				"foo":                         "bar",
+			},
+		},
+		{
+			name: "tags don't contain taints key",
+			tags: map[string]string{
+				"foo": "bar",
+			},
+			want: map[string]string{
+				apiv1.LabelArchStable:         cloudprovider.DefaultArch,
+				apiv1.LabelOSStable:           cloudprovider.DefaultOS,
+				apiv1.LabelInstanceTypeStable: template.name,
+				apiv1.LabelTopologyRegion:     template.region,
+				apiv1.LabelTopologyZone:       template.zone,
+				apiv1.LabelHostname:           "foo",
+				"foo":                         "bar",
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			template.tags = tt.tags
+			if got := buildGenericLabels(template, "foo"); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("buildGenericLabels() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
--- a/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/ionoscloud/ionoscloud_cloud_provider.go
@ -232,6 +232,11 @@ func (ic *IonosCloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprov
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (ic *IonosCloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not
 // available. Implementation optional.
 func (ic *IonosCloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/kamatera/kamatera_api_client_rest.go
+++ b/cluster-autoscaler/cloudprovider/kamatera/kamatera_api_client_rest.go
@ -18,9 +18,8 @@ package kamatera

 import (
 	"context"
-	"encoding/hex"
 	"fmt"
-	"github.com/satori/go.uuid"
+	"github.com/google/uuid"
 	"k8s.io/autoscaler/cluster-autoscaler/version"
 	"k8s.io/klog/v2"
 	"strings"
@ -266,5 +265,5 @@ func kamateraServerName(namePrefix string) string {
 	if len(namePrefix) > 0 {
 		namePrefix = fmt.Sprintf("%s-", namePrefix)
 	}
-	return fmt.Sprintf("%s%s", namePrefix, hex.EncodeToString(uuid.NewV4().Bytes()))
+	return fmt.Sprintf("%s%s", namePrefix, strings.ReplaceAll(uuid.New().String(), "-", ""))
 }
--- a/cluster-autoscaler/cloudprovider/kamatera/kamatera_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/kamatera/kamatera_cloud_provider.go
@ -70,6 +70,11 @@ func (k *kamateraCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (k *kamateraCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 // Implementation optional.
 func (k *kamateraCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/kamatera/kamatera_utils_test.go
+++ b/cluster-autoscaler/cloudprovider/kamatera/kamatera_utils_test.go
@ -18,14 +18,13 @@ package kamatera

 import (
 	"context"
-	"encoding/hex"
-	"fmt"
-	uuid "github.com/satori/go.uuid"
+	"github.com/google/uuid"
 	"github.com/stretchr/testify/mock"
+	"strings"
 )

 func mockKamateraServerName() string {
-	return fmt.Sprintf("%s", hex.EncodeToString(uuid.NewV4().Bytes()))
+	return strings.ReplaceAll(uuid.New().String(), "-", "")
 }

 func mockServerConfig(namePrefix string, tags []string) ServerConfig {
--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_linux.go
@ -139,6 +139,11 @@ func (kubemark *KubemarkCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloud
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (kubemark *KubemarkCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // GetAvailableMachineTypes get all machine types that can be requested from the cloud provider.
 // Implementation optional.
 func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, error) {
--- a/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
+++ b/cluster-autoscaler/cloudprovider/kubemark/kubemark_other.go
@ -80,6 +80,11 @@ func (kubemark *KubemarkCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloud
 	return nil, cloudprovider.ErrNotImplemented
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (kubemark *KubemarkCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // GetAvailableMachineTypes get all machine types that can be requested from the cloud provider.
 // Implementation optional.
 func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, error) {
--- a/cluster-autoscaler/cloudprovider/linode/linode_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/linode/linode_cloud_provider.go
@ -67,6 +67,11 @@ func (l *linodeCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (l *linodeCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 // Implementation optional.
 func (l *linodeCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/magnum/examples/cluster-autoscaler-svcaccount.yaml
+++ b/cluster-autoscaler/cloudprovider/magnum/examples/cluster-autoscaler-svcaccount.yaml
@ -39,7 +39,7 @@ rules:
    resources: ["daemonsets", "replicasets", "statefulsets"]
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
-    resources: ["storageclasses"]
+    resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
    verbs: ["watch", "list", "get"]
  - apiGroups: [""]
    resources: ["configmaps"]
--- a/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider.go
@ -135,6 +135,11 @@ func (mcp *magnumCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
 	return nil, nil
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (mcp *magnumCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing is not implemented.
 func (mcp *magnumCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
 	return nil, cloudprovider.ErrNotImplemented
--- a/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider_test.go
+++ b/cluster-autoscaler/cloudprovider/magnum/magnum_cloud_provider_test.go
@ -23,7 +23,7 @@ import (
 	"testing"
 	"time"

-	"github.com/satori/go.uuid"
+	"github.com/gofrs/uuid"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/require"
@ -46,7 +46,12 @@ func (m *magnumManagerDiscoveryMock) autoDiscoverNodeGroups(cfgs []magnumAutoDis
 	ngs := []*nodegroups.NodeGroup{}
 	two := 2
 	for i := 0; i < rand.Intn(20); i++ {
-		ngs = append(ngs, &nodegroups.NodeGroup{Name: uuid.NewV4().String(), NodeCount: 1, MinNodeCount: 1, MaxNodeCount: &two})
+		newUUID, err := uuid.NewV4()
+		if err != nil {
+			return nil, fmt.Errorf("failed to produce a random UUID: %v", err)
+		}
+		newUUIDStr := newUUID.String()
+		ngs = append(ngs, &nodegroups.NodeGroup{Name: newUUIDStr, NodeCount: 1, MinNodeCount: 1, MaxNodeCount: &two})
 	}
 	return ngs, nil
 }
--- a/cluster-autoscaler/cloudprovider/magnum/magnum_manager_impl.go
+++ b/cluster-autoscaler/cloudprovider/magnum/magnum_manager_impl.go
@ -21,7 +21,7 @@ import (
 	"sort"
 	"strings"

-	"github.com/satori/go.uuid"
+	"github.com/gofrs/uuid"

 	apiv1 "k8s.io/api/core/v1"

--- a/cluster-autoscaler/cloudprovider/mocks/CloudProvider.go
+++ b/cluster-autoscaler/cloudprovider/mocks/CloudProvider.go
@ -177,6 +177,29 @@ func (_m *CloudProvider) NodeGroupForNode(_a0 *v1.Node) (cloudprovider.NodeGroup
 	return r0, r1
 }

+// HasInstance provides a mock function with given fields:
+func (_m *CloudProvider) HasInstance(_a0 *v1.Node) (bool, error) {
+	ret := _m.Called(_a0)
+
+	var r0 bool
+	if rf, ok := ret.Get(0).(func(*v1.Node) bool); ok {
+		r0 = rf(_a0)
+	} else {
+		if ret.Get(0) != nil {
+			r0 = ret.Get(0).(bool)
+		}
+	}
+
+	var r1 error
+	if rf, ok := ret.Get(1).(func(*v1.Node) error); ok {
+		r1 = rf(_a0)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
 // NodeGroups provides a mock function with given fields:
 func (_m *CloudProvider) NodeGroups() []cloudprovider.NodeGroup {
 	ret := _m.Called()
--- a/cluster-autoscaler/cloudprovider/oci/OWNERS
+++ b/cluster-autoscaler/cloudprovider/oci/OWNERS
@ -1,5 +1,5 @@
 approvers:
-#- jlamillan
+- jlamillan
 reviewers:
-#- jlamillan
+- jlamillan
 #- ericrrath
--- a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
+++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
@ -15,6 +15,9 @@ metadata:
    k8s-addon: cluster-autoscaler.addons.k8s.io
    k8s-app: cluster-autoscaler
 rules:
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csidriver", "csistoragecapacities"]
+    verbs: ["watch", "list"]
  - apiGroups: [""]
    resources: ["events", "endpoints"]
    verbs: ["create", "patch"]
@ -24,16 +27,16 @@ rules:
  - apiGroups: [""]
    resources: ["pods/status"]
    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["namespaces"]
+    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["endpoints"]
    resourceNames: ["cluster-autoscaler"]
    verbs: ["get", "update"]
  - apiGroups: [""]
    resources: ["nodes"]
-    verbs: ["watch", "list", "get", "update"]
-  - apiGroups: [""]
-    resources: ["namepaces"]
-    verbs: ["list"]
+    verbs: ["watch", "list", "get", "patch", "update"]
  - apiGroups: [""]
    resources:
      - "pods"
@ -53,10 +56,10 @@ rules:
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses", "csinodes"]
-    verbs: ["get", "list", "watch"]
-  - apiGroups: ["batch"]
-    resources: ["jobs", "cronjobs"]
    verbs: ["watch", "list", "get"]
+  - apiGroups: ["batch", "extensions"]
+    resources: ["jobs"]
+    verbs: ["get", "list", "watch", "patch"]
  - apiGroups: ["coordination.k8s.io"]
    resources: ["leases"]
    verbs: ["create"]
@ -64,10 +67,6 @@ rules:
    resourceNames: ["cluster-autoscaler"]
    resources: ["leases"]
    verbs: ["get", "update"]
-  - apiGroups: ["storage.k8s.io"]
-    resources: ["csidrivers", "csistoragecapacities"]
-    verbs: ["get", "list"]
-
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
--- a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
+++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
@ -15,6 +15,9 @@ metadata:
    k8s-addon: cluster-autoscaler.addons.k8s.io
    k8s-app: cluster-autoscaler
 rules:
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csidriver", "csistoragecapacities"]
+    verbs: ["watch", "list"]
  - apiGroups: [""]
    resources: ["events", "endpoints"]
    verbs: ["create", "patch"]
@ -24,16 +27,16 @@ rules:
  - apiGroups: [""]
    resources: ["pods/status"]
    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["namespaces"]
+    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["endpoints"]
    resourceNames: ["cluster-autoscaler"]
    verbs: ["get", "update"]
  - apiGroups: [""]
    resources: ["nodes"]
-    verbs: ["watch", "list", "get", "update"]
-  - apiGroups: [""]
-    resources: ["namespaces"]
-    verbs: ["list"]
+    verbs: ["watch", "list", "get", "patch", "update"]
  - apiGroups: [""]
    resources:
      - "pods"
@ -53,10 +56,10 @@ rules:
    verbs: ["watch", "list", "get"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses", "csinodes"]
-    verbs: ["get", "list", "watch"]
-  - apiGroups: ["batch"]
-    resources: ["jobs", "cronjobs"]
    verbs: ["watch", "list", "get"]
+  - apiGroups: ["batch", "extensions"]
+    resources: ["jobs"]
+    verbs: ["get", "list", "watch", "patch"]
  - apiGroups: ["coordination.k8s.io"]
    resources: ["leases"]
    verbs: ["create"]
@ -64,10 +67,6 @@ rules:
    resourceNames: ["cluster-autoscaler"]
    resources: ["leases"]
    verbs: ["get", "update"]
-  - apiGroups: ["storage.k8s.io"]
-    resources: ["csidrivers", "csistoragecapacities"]
-    verbs: ["get", "list"]
-
 ---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/get_work_request_request_response.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/get_work_request_request_response.go
@ -0,0 +1,74 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"net/http"
+)
+
+// GetWorkRequestRequest wrapper for the GetWorkRequest operation
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/GetWorkRequest.go.html to see an example of how to use GetWorkRequestRequest.
+type GetWorkRequestRequest struct {
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
+	WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
+
+	// Metadata about the request. This information will not be transmitted to the service, but
+	// represents information that the SDK will consume to drive retry behavior.
+	RequestMetadata common.RequestMetadata
+}
+
+func (request GetWorkRequestRequest) String() string {
+	return common.PointerString(request)
+}
+
+// HTTPRequest implements the OCIRequest interface
+func (request GetWorkRequestRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
+
+	return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
+}
+
+// BinaryRequestBody implements the OCIRequest interface
+func (request GetWorkRequestRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
+
+	return nil, false
+
+}
+
+// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
+func (request GetWorkRequestRequest) RetryPolicy() *common.RetryPolicy {
+	return request.RequestMetadata.RetryPolicy
+}
+
+// GetWorkRequestResponse wrapper for the GetWorkRequest operation
+type GetWorkRequestResponse struct {
+
+	// The underlying http response
+	RawResponse *http.Response
+
+	// The WorkRequest instance
+	WorkRequest `presentIn:"body"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
+}
+
+func (response GetWorkRequestResponse) String() string {
+	return common.PointerString(response)
+}
+
+// HTTPResponse implements the OCIResponse interface
+func (response GetWorkRequestResponse) HTTPResponse() *http.Response {
+	return response.RawResponse
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_request_errors_request_response.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_request_errors_request_response.go
@ -0,0 +1,115 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"net/http"
+)
+
+// ListWorkRequestErrorsRequest wrapper for the ListWorkRequestErrors operation
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestErrors.go.html to see an example of how to use ListWorkRequestErrorsRequest.
+type ListWorkRequestErrorsRequest struct {
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
+	WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
+
+	// For list pagination. The maximum number of results per page, or items to return in a
+	// paginated "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
+
+	// For list pagination. The value of the `opc-next-page` response header from the
+	// previous "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Page *string `mandatory:"false" contributesTo:"query" name:"page"`
+
+	// The sort order to use, either ascending (`ASC`) or descending (`DESC`).
+	SortOrder ListWorkRequestErrorsSortOrderEnum `mandatory:"false" contributesTo:"query" name:"sortOrder" omitEmpty:"true"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
+
+	// Metadata about the request. This information will not be transmitted to the service, but
+	// represents information that the SDK will consume to drive retry behavior.
+	RequestMetadata common.RequestMetadata
+}
+
+func (request ListWorkRequestErrorsRequest) String() string {
+	return common.PointerString(request)
+}
+
+// HTTPRequest implements the OCIRequest interface
+func (request ListWorkRequestErrorsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
+
+	return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
+}
+
+// BinaryRequestBody implements the OCIRequest interface
+func (request ListWorkRequestErrorsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
+
+	return nil, false
+
+}
+
+// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
+func (request ListWorkRequestErrorsRequest) RetryPolicy() *common.RetryPolicy {
+	return request.RequestMetadata.RetryPolicy
+}
+
+// ListWorkRequestErrorsResponse wrapper for the ListWorkRequestErrors operation
+type ListWorkRequestErrorsResponse struct {
+
+	// The underlying http response
+	RawResponse *http.Response
+
+	// A list of []WorkRequestError instances
+	Items []WorkRequestError `presentIn:"body"`
+
+	// For list pagination. When this header appears in the response, additional pages of
+	// results remain. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
+}
+
+func (response ListWorkRequestErrorsResponse) String() string {
+	return common.PointerString(response)
+}
+
+// HTTPResponse implements the OCIResponse interface
+func (response ListWorkRequestErrorsResponse) HTTPResponse() *http.Response {
+	return response.RawResponse
+}
+
+// ListWorkRequestErrorsSortOrderEnum Enum with underlying type: string
+type ListWorkRequestErrorsSortOrderEnum string
+
+// Set of constants representing the allowable values for ListWorkRequestErrorsSortOrderEnum
+const (
+	ListWorkRequestErrorsSortOrderAsc  ListWorkRequestErrorsSortOrderEnum = "ASC"
+	ListWorkRequestErrorsSortOrderDesc ListWorkRequestErrorsSortOrderEnum = "DESC"
+)
+
+var mappingListWorkRequestErrorsSortOrder = map[string]ListWorkRequestErrorsSortOrderEnum{
+	"ASC":  ListWorkRequestErrorsSortOrderAsc,
+	"DESC": ListWorkRequestErrorsSortOrderDesc,
+}
+
+// GetListWorkRequestErrorsSortOrderEnumValues Enumerates the set of values for ListWorkRequestErrorsSortOrderEnum
+func GetListWorkRequestErrorsSortOrderEnumValues() []ListWorkRequestErrorsSortOrderEnum {
+	values := make([]ListWorkRequestErrorsSortOrderEnum, 0)
+	for _, v := range mappingListWorkRequestErrorsSortOrder {
+		values = append(values, v)
+	}
+	return values
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_request_logs_request_response.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_request_logs_request_response.go
@ -0,0 +1,115 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"net/http"
+)
+
+// ListWorkRequestLogsRequest wrapper for the ListWorkRequestLogs operation
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestLogs.go.html to see an example of how to use ListWorkRequestLogsRequest.
+type ListWorkRequestLogsRequest struct {
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
+	WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
+
+	// For list pagination. The maximum number of results per page, or items to return in a
+	// paginated "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
+
+	// For list pagination. The value of the `opc-next-page` response header from the
+	// previous "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Page *string `mandatory:"false" contributesTo:"query" name:"page"`
+
+	// The sort order to use, either ascending (`ASC`) or descending (`DESC`).
+	SortOrder ListWorkRequestLogsSortOrderEnum `mandatory:"false" contributesTo:"query" name:"sortOrder" omitEmpty:"true"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
+
+	// Metadata about the request. This information will not be transmitted to the service, but
+	// represents information that the SDK will consume to drive retry behavior.
+	RequestMetadata common.RequestMetadata
+}
+
+func (request ListWorkRequestLogsRequest) String() string {
+	return common.PointerString(request)
+}
+
+// HTTPRequest implements the OCIRequest interface
+func (request ListWorkRequestLogsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
+
+	return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
+}
+
+// BinaryRequestBody implements the OCIRequest interface
+func (request ListWorkRequestLogsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
+
+	return nil, false
+
+}
+
+// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
+func (request ListWorkRequestLogsRequest) RetryPolicy() *common.RetryPolicy {
+	return request.RequestMetadata.RetryPolicy
+}
+
+// ListWorkRequestLogsResponse wrapper for the ListWorkRequestLogs operation
+type ListWorkRequestLogsResponse struct {
+
+	// The underlying http response
+	RawResponse *http.Response
+
+	// A list of []WorkRequestLogEntry instances
+	Items []WorkRequestLogEntry `presentIn:"body"`
+
+	// For list pagination. When this header appears in the response, additional pages of
+	// results remain. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
+}
+
+func (response ListWorkRequestLogsResponse) String() string {
+	return common.PointerString(response)
+}
+
+// HTTPResponse implements the OCIResponse interface
+func (response ListWorkRequestLogsResponse) HTTPResponse() *http.Response {
+	return response.RawResponse
+}
+
+// ListWorkRequestLogsSortOrderEnum Enum with underlying type: string
+type ListWorkRequestLogsSortOrderEnum string
+
+// Set of constants representing the allowable values for ListWorkRequestLogsSortOrderEnum
+const (
+	ListWorkRequestLogsSortOrderAsc  ListWorkRequestLogsSortOrderEnum = "ASC"
+	ListWorkRequestLogsSortOrderDesc ListWorkRequestLogsSortOrderEnum = "DESC"
+)
+
+var mappingListWorkRequestLogsSortOrder = map[string]ListWorkRequestLogsSortOrderEnum{
+	"ASC":  ListWorkRequestLogsSortOrderAsc,
+	"DESC": ListWorkRequestLogsSortOrderDesc,
+}
+
+// GetListWorkRequestLogsSortOrderEnumValues Enumerates the set of values for ListWorkRequestLogsSortOrderEnum
+func GetListWorkRequestLogsSortOrderEnumValues() []ListWorkRequestLogsSortOrderEnum {
+	values := make([]ListWorkRequestLogsSortOrderEnum, 0)
+	for _, v := range mappingListWorkRequestLogsSortOrder {
+		values = append(values, v)
+	}
+	return values
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_requests_request_response.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/list_work_requests_request_response.go
@ -0,0 +1,92 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"net/http"
+)
+
+// ListWorkRequestsRequest wrapper for the ListWorkRequests operation
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequests.go.html to see an example of how to use ListWorkRequestsRequest.
+type ListWorkRequestsRequest struct {
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment.
+	CompartmentId *string `mandatory:"true" contributesTo:"query" name:"compartmentId"`
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the resource.
+	ResourceId *string `mandatory:"false" contributesTo:"query" name:"resourceId"`
+
+	// For list pagination. The maximum number of results per page, or items to return in a
+	// paginated "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
+
+	// For list pagination. The value of the `opc-next-page` response header from the
+	// previous "List" call. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	Page *string `mandatory:"false" contributesTo:"query" name:"page"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
+
+	// Metadata about the request. This information will not be transmitted to the service, but
+	// represents information that the SDK will consume to drive retry behavior.
+	RequestMetadata common.RequestMetadata
+}
+
+func (request ListWorkRequestsRequest) String() string {
+	return common.PointerString(request)
+}
+
+// HTTPRequest implements the OCIRequest interface
+func (request ListWorkRequestsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
+
+	return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
+}
+
+// BinaryRequestBody implements the OCIRequest interface
+func (request ListWorkRequestsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
+
+	return nil, false
+
+}
+
+// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
+func (request ListWorkRequestsRequest) RetryPolicy() *common.RetryPolicy {
+	return request.RequestMetadata.RetryPolicy
+}
+
+// ListWorkRequestsResponse wrapper for the ListWorkRequests operation
+type ListWorkRequestsResponse struct {
+
+	// The underlying http response
+	RawResponse *http.Response
+
+	// A list of []WorkRequestSummary instances
+	Items []WorkRequestSummary `presentIn:"body"`
+
+	// For list pagination. When this header appears in the response, additional pages of
+	// results remain. For important details about how pagination works, see
+	// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
+	OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
+
+	// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
+	// particular request, please provide the request ID.
+	OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
+}
+
+func (response ListWorkRequestsResponse) String() string {
+	return common.PointerString(response)
+}
+
+// HTTPResponse implements the OCIResponse interface
+func (response ListWorkRequestsResponse) HTTPResponse() *http.Response {
+	return response.RawResponse
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request.go
@ -0,0 +1,87 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+// WorkRequest An asynchronous work request.
+type WorkRequest struct {
+
+	// The asynchronous operation tracked by this work request.
+	OperationType *string `mandatory:"true" json:"operationType"`
+
+	// The status of the work request.
+	Status WorkRequestStatusEnum `mandatory:"true" json:"status"`
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
+	Id *string `mandatory:"true" json:"id"`
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment
+	// that contains the work request.
+	CompartmentId *string `mandatory:"true" json:"compartmentId"`
+
+	// The resources that are affected by this work request.
+	Resources []WorkRequestResource `mandatory:"true" json:"resources"`
+
+	// The percentage complete of the operation tracked by this work request.
+	PercentComplete *float32 `mandatory:"true" json:"percentComplete"`
+
+	// The date and time the work request was created, in the format defined by RFC3339.
+	TimeAccepted *common.SDKTime `mandatory:"true" json:"timeAccepted"`
+
+	// The date and time the work request transitioned from `ACCEPTED` to `IN_PROGRESS`,
+	// in the format defined by RFC3339.
+	TimeStarted *common.SDKTime `mandatory:"false" json:"timeStarted"`
+
+	// The date and time the work request reached a terminal state, either `FAILED` or `SUCCEEDED`.
+	// Format is defined by RFC3339.
+	TimeFinished *common.SDKTime `mandatory:"false" json:"timeFinished"`
+}
+
+func (m WorkRequest) String() string {
+	return common.PointerString(m)
+}
+
+// WorkRequestStatusEnum Enum with underlying type: string
+type WorkRequestStatusEnum string
+
+// Set of constants representing the allowable values for WorkRequestStatusEnum
+const (
+	WorkRequestStatusAccepted   WorkRequestStatusEnum = "ACCEPTED"
+	WorkRequestStatusInProgress WorkRequestStatusEnum = "IN_PROGRESS"
+	WorkRequestStatusFailed     WorkRequestStatusEnum = "FAILED"
+	WorkRequestStatusSucceeded  WorkRequestStatusEnum = "SUCCEEDED"
+	WorkRequestStatusCanceling  WorkRequestStatusEnum = "CANCELING"
+	WorkRequestStatusCanceled   WorkRequestStatusEnum = "CANCELED"
+)
+
+var mappingWorkRequestStatus = map[string]WorkRequestStatusEnum{
+	"ACCEPTED":    WorkRequestStatusAccepted,
+	"IN_PROGRESS": WorkRequestStatusInProgress,
+	"FAILED":      WorkRequestStatusFailed,
+	"SUCCEEDED":   WorkRequestStatusSucceeded,
+	"CANCELING":   WorkRequestStatusCanceling,
+	"CANCELED":    WorkRequestStatusCanceled,
+}
+
+// GetWorkRequestStatusEnumValues Enumerates the set of values for WorkRequestStatusEnum
+func GetWorkRequestStatusEnumValues() []WorkRequestStatusEnum {
+	values := make([]WorkRequestStatusEnum, 0)
+	for _, v := range mappingWorkRequestStatus {
+		values = append(values, v)
+	}
+	return values
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_error.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_error.go
@ -0,0 +1,35 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+// WorkRequestError An error encountered while executing an operation that is tracked by a work request.
+type WorkRequestError struct {
+
+	// A machine-usable code for the error that occured.
+	Code *string `mandatory:"true" json:"code"`
+
+	// A human-readable error string.
+	Message *string `mandatory:"true" json:"message"`
+
+	// The date and time the error occurred.
+	Timestamp *common.SDKTime `mandatory:"true" json:"timestamp"`
+}
+
+func (m WorkRequestError) String() string {
+	return common.PointerString(m)
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_log_entry.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_log_entry.go
@ -0,0 +1,32 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+// WorkRequestLogEntry A log message from executing an operation that is tracked by a work request.
+type WorkRequestLogEntry struct {
+
+	// A human-readable log message.
+	Message *string `mandatory:"true" json:"message"`
+
+	// The date and time the log message was written.
+	Timestamp *common.SDKTime `mandatory:"true" json:"timestamp"`
+}
+
+func (m WorkRequestLogEntry) String() string {
+	return common.PointerString(m)
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_resource.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_resource.go
@ -0,0 +1,70 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+// WorkRequestResource A resource that is created or operated on by an asynchronous operation that is tracked by
+// a work request.
+type WorkRequestResource struct {
+
+	// The way in which this resource was affected by the operation that spawned the work
+	// request.
+	ActionType WorkRequestResourceActionTypeEnum `mandatory:"true" json:"actionType"`
+
+	// The resource type the work request affects.
+	EntityType *string `mandatory:"true" json:"entityType"`
+
+	// An OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) or other unique identifier for the
+	// resource.
+	Identifier *string `mandatory:"true" json:"identifier"`
+
+	// The URI path that you can use for a GET request to access the resource metadata.
+	EntityUri *string `mandatory:"false" json:"entityUri"`
+}
+
+func (m WorkRequestResource) String() string {
+	return common.PointerString(m)
+}
+
+// WorkRequestResourceActionTypeEnum Enum with underlying type: string
+type WorkRequestResourceActionTypeEnum string
+
+// Set of constants representing the allowable values for WorkRequestResourceActionTypeEnum
+const (
+	WorkRequestResourceActionTypeCreated    WorkRequestResourceActionTypeEnum = "CREATED"
+	WorkRequestResourceActionTypeUpdated    WorkRequestResourceActionTypeEnum = "UPDATED"
+	WorkRequestResourceActionTypeDeleted    WorkRequestResourceActionTypeEnum = "DELETED"
+	WorkRequestResourceActionTypeRelated    WorkRequestResourceActionTypeEnum = "RELATED"
+	WorkRequestResourceActionTypeInProgress WorkRequestResourceActionTypeEnum = "IN_PROGRESS"
+)
+
+var mappingWorkRequestResourceActionType = map[string]WorkRequestResourceActionTypeEnum{
+	"CREATED":     WorkRequestResourceActionTypeCreated,
+	"UPDATED":     WorkRequestResourceActionTypeUpdated,
+	"DELETED":     WorkRequestResourceActionTypeDeleted,
+	"RELATED":     WorkRequestResourceActionTypeRelated,
+	"IN_PROGRESS": WorkRequestResourceActionTypeInProgress,
+}
+
+// GetWorkRequestResourceActionTypeEnumValues Enumerates the set of values for WorkRequestResourceActionTypeEnum
+func GetWorkRequestResourceActionTypeEnumValues() []WorkRequestResourceActionTypeEnum {
+	values := make([]WorkRequestResourceActionTypeEnum, 0)
+	for _, v := range mappingWorkRequestResourceActionType {
+		values = append(values, v)
+	}
+	return values
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_summary.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/work_request_summary.go
@ -0,0 +1,84 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+// WorkRequestSummary A summary of the status of a work request.
+type WorkRequestSummary struct {
+
+	// The asynchronous operation tracked by this work request.
+	OperationType *string `mandatory:"true" json:"operationType"`
+
+	// The status of the work request.
+	Status WorkRequestSummaryStatusEnum `mandatory:"true" json:"status"`
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
+	Id *string `mandatory:"true" json:"id"`
+
+	// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment containing
+	// this work request.
+	CompartmentId *string `mandatory:"true" json:"compartmentId"`
+
+	// The percentage complete of the operation tracked by this work request.
+	PercentComplete *float32 `mandatory:"true" json:"percentComplete"`
+
+	// The date and time the work request was created, in the format defined by RFC3339.
+	TimeAccepted *common.SDKTime `mandatory:"true" json:"timeAccepted"`
+
+	// The date and time the work request transitioned from `ACCEPTED` to `IN_PROGRESS`, in
+	// the format defined by RFC3339.
+	TimeStarted *common.SDKTime `mandatory:"false" json:"timeStarted"`
+
+	// The date and time the work request reached a terminal state, either `FAILED` or `SUCCEEDED`.
+	// Format is defined by RFC3339.
+	TimeFinished *common.SDKTime `mandatory:"false" json:"timeFinished"`
+}
+
+func (m WorkRequestSummary) String() string {
+	return common.PointerString(m)
+}
+
+// WorkRequestSummaryStatusEnum Enum with underlying type: string
+type WorkRequestSummaryStatusEnum string
+
+// Set of constants representing the allowable values for WorkRequestSummaryStatusEnum
+const (
+	WorkRequestSummaryStatusAccepted   WorkRequestSummaryStatusEnum = "ACCEPTED"
+	WorkRequestSummaryStatusInProgress WorkRequestSummaryStatusEnum = "IN_PROGRESS"
+	WorkRequestSummaryStatusFailed     WorkRequestSummaryStatusEnum = "FAILED"
+	WorkRequestSummaryStatusSucceeded  WorkRequestSummaryStatusEnum = "SUCCEEDED"
+	WorkRequestSummaryStatusCanceling  WorkRequestSummaryStatusEnum = "CANCELING"
+	WorkRequestSummaryStatusCanceled   WorkRequestSummaryStatusEnum = "CANCELED"
+)
+
+var mappingWorkRequestSummaryStatus = map[string]WorkRequestSummaryStatusEnum{
+	"ACCEPTED":    WorkRequestSummaryStatusAccepted,
+	"IN_PROGRESS": WorkRequestSummaryStatusInProgress,
+	"FAILED":      WorkRequestSummaryStatusFailed,
+	"SUCCEEDED":   WorkRequestSummaryStatusSucceeded,
+	"CANCELING":   WorkRequestSummaryStatusCanceling,
+	"CANCELED":    WorkRequestSummaryStatusCanceled,
+}
+
+// GetWorkRequestSummaryStatusEnumValues Enumerates the set of values for WorkRequestSummaryStatusEnum
+func GetWorkRequestSummaryStatusEnumValues() []WorkRequestSummaryStatusEnum {
+	values := make([]WorkRequestSummaryStatusEnum, 0)
+	for _, v := range mappingWorkRequestSummaryStatus {
+		values = append(values, v)
+	}
+	return values
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/workrequests_workrequest_client.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests/workrequests_workrequest_client.go
@ -0,0 +1,301 @@
+// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates.  All rights reserved.
+// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
+// Code generated. DO NOT EDIT.
+
+// Work Requests API
+//
+// Many of the API operations that you use to create and configure Compute resources do not take effect
+// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
+// Work requests provide visibility into the status of these in-progress, long-running workflows.
+// For more information about work requests and the operations that spawn work requests, see
+// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
+//
+
+package workrequests
+
+import (
+	"context"
+	"fmt"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth"
+	"net/http"
+)
+
+// WorkRequestClient a client for WorkRequest
+type WorkRequestClient struct {
+	common.BaseClient
+	config *common.ConfigurationProvider
+}
+
+// NewWorkRequestClientWithConfigurationProvider Creates a new default WorkRequest client with the given configuration provider.
+// the configuration provider will be used for the default signer as well as reading the region
+func NewWorkRequestClientWithConfigurationProvider(configProvider common.ConfigurationProvider) (client WorkRequestClient, err error) {
+	provider, err := auth.GetGenericConfigurationProvider(configProvider)
+	if err != nil {
+		return client, err
+	}
+	baseClient, e := common.NewClientWithConfig(provider)
+	if e != nil {
+		return client, e
+	}
+	return newWorkRequestClientFromBaseClient(baseClient, provider)
+}
+
+// NewWorkRequestClientWithOboToken Creates a new default WorkRequest client with the given configuration provider.
+// The obotoken will be added to default headers and signed; the configuration provider will be used for the signer
+//
+//	as well as reading the region
+func NewWorkRequestClientWithOboToken(configProvider common.ConfigurationProvider, oboToken string) (client WorkRequestClient, err error) {
+	baseClient, err := common.NewClientWithOboToken(configProvider, oboToken)
+	if err != nil {
+		return client, err
+	}
+
+	return newWorkRequestClientFromBaseClient(baseClient, configProvider)
+}
+
+func newWorkRequestClientFromBaseClient(baseClient common.BaseClient, configProvider common.ConfigurationProvider) (client WorkRequestClient, err error) {
+	client = WorkRequestClient{BaseClient: baseClient}
+	client.BasePath = "20160918"
+	err = client.setConfigurationProvider(configProvider)
+	return
+}
+
+// SetRegion overrides the region of this client.
+func (client *WorkRequestClient) SetRegion(region string) {
+	client.Host = common.StringToRegion(region).EndpointForTemplate("workrequests", "https://iaas.{region}.{secondLevelDomain}")
+}
+
+// SetConfigurationProvider sets the configuration provider including the region, returns an error if is not valid
+func (client *WorkRequestClient) setConfigurationProvider(configProvider common.ConfigurationProvider) error {
+	if ok, err := common.IsConfigurationProviderValid(configProvider); !ok {
+		return err
+	}
+
+	// Error has been checked already
+	region, _ := configProvider.Region()
+	client.SetRegion(region)
+	client.config = &configProvider
+	return nil
+}
+
+// ConfigurationProvider the ConfigurationProvider used in this client, or null if none set
+func (client *WorkRequestClient) ConfigurationProvider() *common.ConfigurationProvider {
+	return client.config
+}
+
+// GetWorkRequest Gets the details of a work request.
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/GetWorkRequest.go.html to see an example of how to use GetWorkRequest API.
+func (client WorkRequestClient) GetWorkRequest(ctx context.Context, request GetWorkRequestRequest) (response GetWorkRequestResponse, err error) {
+	var ociResponse common.OCIResponse
+	policy := common.NoRetryPolicy()
+	if client.RetryPolicy() != nil {
+		policy = *client.RetryPolicy()
+	}
+	if request.RetryPolicy() != nil {
+		policy = *request.RetryPolicy()
+	}
+	ociResponse, err = common.Retry(ctx, request, client.getWorkRequest, policy)
+	if err != nil {
+		if ociResponse != nil {
+			if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
+				opcRequestId := httpResponse.Header.Get("opc-request-id")
+				response = GetWorkRequestResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
+			} else {
+				response = GetWorkRequestResponse{}
+			}
+		}
+		return
+	}
+	if convertedResponse, ok := ociResponse.(GetWorkRequestResponse); ok {
+		response = convertedResponse
+	} else {
+		err = fmt.Errorf("failed to convert OCIResponse into GetWorkRequestResponse")
+	}
+	return
+}
+
+// getWorkRequest implements the OCIOperation interface (enables retrying operations)
+func (client WorkRequestClient) getWorkRequest(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
+	httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}", binaryReqBody)
+	if err != nil {
+		return nil, err
+	}
+
+	var response GetWorkRequestResponse
+	var httpResponse *http.Response
+	httpResponse, err = client.Call(ctx, &httpRequest)
+	defer common.CloseBodyIfValid(httpResponse)
+	response.RawResponse = httpResponse
+	if err != nil {
+		return response, err
+	}
+
+	err = common.UnmarshalResponse(httpResponse, &response)
+	return response, err
+}
+
+// ListWorkRequestErrors Gets the errors for a work request.
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestErrors.go.html to see an example of how to use ListWorkRequestErrors API.
+func (client WorkRequestClient) ListWorkRequestErrors(ctx context.Context, request ListWorkRequestErrorsRequest) (response ListWorkRequestErrorsResponse, err error) {
+	var ociResponse common.OCIResponse
+	policy := common.NoRetryPolicy()
+	if client.RetryPolicy() != nil {
+		policy = *client.RetryPolicy()
+	}
+	if request.RetryPolicy() != nil {
+		policy = *request.RetryPolicy()
+	}
+	ociResponse, err = common.Retry(ctx, request, client.listWorkRequestErrors, policy)
+	if err != nil {
+		if ociResponse != nil {
+			if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
+				opcRequestId := httpResponse.Header.Get("opc-request-id")
+				response = ListWorkRequestErrorsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
+			} else {
+				response = ListWorkRequestErrorsResponse{}
+			}
+		}
+		return
+	}
+	if convertedResponse, ok := ociResponse.(ListWorkRequestErrorsResponse); ok {
+		response = convertedResponse
+	} else {
+		err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestErrorsResponse")
+	}
+	return
+}
+
+// listWorkRequestErrors implements the OCIOperation interface (enables retrying operations)
+func (client WorkRequestClient) listWorkRequestErrors(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
+	httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}/errors", binaryReqBody)
+	if err != nil {
+		return nil, err
+	}
+
+	var response ListWorkRequestErrorsResponse
+	var httpResponse *http.Response
+	httpResponse, err = client.Call(ctx, &httpRequest)
+	defer common.CloseBodyIfValid(httpResponse)
+	response.RawResponse = httpResponse
+	if err != nil {
+		return response, err
+	}
+
+	err = common.UnmarshalResponse(httpResponse, &response)
+	return response, err
+}
+
+// ListWorkRequestLogs Gets the logs for a work request.
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestLogs.go.html to see an example of how to use ListWorkRequestLogs API.
+func (client WorkRequestClient) ListWorkRequestLogs(ctx context.Context, request ListWorkRequestLogsRequest) (response ListWorkRequestLogsResponse, err error) {
+	var ociResponse common.OCIResponse
+	policy := common.NoRetryPolicy()
+	if client.RetryPolicy() != nil {
+		policy = *client.RetryPolicy()
+	}
+	if request.RetryPolicy() != nil {
+		policy = *request.RetryPolicy()
+	}
+	ociResponse, err = common.Retry(ctx, request, client.listWorkRequestLogs, policy)
+	if err != nil {
+		if ociResponse != nil {
+			if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
+				opcRequestId := httpResponse.Header.Get("opc-request-id")
+				response = ListWorkRequestLogsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
+			} else {
+				response = ListWorkRequestLogsResponse{}
+			}
+		}
+		return
+	}
+	if convertedResponse, ok := ociResponse.(ListWorkRequestLogsResponse); ok {
+		response = convertedResponse
+	} else {
+		err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestLogsResponse")
+	}
+	return
+}
+
+// listWorkRequestLogs implements the OCIOperation interface (enables retrying operations)
+func (client WorkRequestClient) listWorkRequestLogs(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
+	httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}/logs", binaryReqBody)
+	if err != nil {
+		return nil, err
+	}
+
+	var response ListWorkRequestLogsResponse
+	var httpResponse *http.Response
+	httpResponse, err = client.Call(ctx, &httpRequest)
+	defer common.CloseBodyIfValid(httpResponse)
+	response.RawResponse = httpResponse
+	if err != nil {
+		return response, err
+	}
+
+	err = common.UnmarshalResponse(httpResponse, &response)
+	return response, err
+}
+
+// ListWorkRequests Lists the work requests in a compartment or for a specified resource.
+//
+// # See also
+//
+// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequests.go.html to see an example of how to use ListWorkRequests API.
+func (client WorkRequestClient) ListWorkRequests(ctx context.Context, request ListWorkRequestsRequest) (response ListWorkRequestsResponse, err error) {
+	var ociResponse common.OCIResponse
+	policy := common.NoRetryPolicy()
+	if client.RetryPolicy() != nil {
+		policy = *client.RetryPolicy()
+	}
+	if request.RetryPolicy() != nil {
+		policy = *request.RetryPolicy()
+	}
+	ociResponse, err = common.Retry(ctx, request, client.listWorkRequests, policy)
+	if err != nil {
+		if ociResponse != nil {
+			if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
+				opcRequestId := httpResponse.Header.Get("opc-request-id")
+				response = ListWorkRequestsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
+			} else {
+				response = ListWorkRequestsResponse{}
+			}
+		}
+		return
+	}
+	if convertedResponse, ok := ociResponse.(ListWorkRequestsResponse); ok {
+		response = convertedResponse
+	} else {
+		err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestsResponse")
+	}
+	return
+}
+
+// listWorkRequests implements the OCIOperation interface (enables retrying operations)
+func (client WorkRequestClient) listWorkRequests(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
+	httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests", binaryReqBody)
+	if err != nil {
+		return nil, err
+	}
+
+	var response ListWorkRequestsResponse
+	var httpResponse *http.Response
+	httpResponse, err = client.Call(ctx, &httpRequest)
+	defer common.CloseBodyIfValid(httpResponse)
+	response.RawResponse = httpResponse
+	if err != nil {
+		return response, err
+	}
+
+	err = common.UnmarshalResponse(httpResponse, &response)
+	return response, err
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go
@ -96,6 +96,11 @@ func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.Node
 	return ng, err
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (ocp *OciCloudProvider) HasInstance(n *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // Pricing returns pricing model for this cloud provider or error if not available.
 // Implementation optional.
 func (ocp *OciCloudProvider) Pricing() (cloudprovider.PricingModel, caerrors.AutoscalerError) {
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go
@ -35,6 +35,9 @@ const (
 	instanceIDLabelSuffix        = "instance-id_suffix"
 	ociInstancePoolIDAnnotation  = "oci.oraclecloud.com/instancepool-id"
 	ociInstancePoolResourceIdent = "instancepool"
+	ociInstancePoolLaunchOp      = "LaunchInstancesInPool"
+	instanceStateUnfulfilled     = "Unfulfilled"
+	instanceIDUnfulfilled        = "instance_placeholder"

 	// Overload ociInstancePoolIDAnnotation to indicate a kubernetes node doesn't belong to any OCI Instance Pool.
 	ociInstancePoolIDNonPoolMember = "non_pool_member"
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go
@ -18,10 +18,12 @@ package oci

 import (
 	"context"
+	"fmt"
 	"github.com/pkg/errors"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
 	"k8s.io/klog/v2"
 	"math"
 	"strings"
@ -48,6 +50,13 @@ type VirtualNetworkClient interface {
 	GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error)
 }

+// WorkRequestClient wraps workrequests.WorkRequestClient exposing the functions we actually require.
+type WorkRequestClient interface {
+	GetWorkRequest(context.Context, workrequests.GetWorkRequestRequest) (workrequests.GetWorkRequestResponse, error)
+	ListWorkRequests(context.Context, workrequests.ListWorkRequestsRequest) (workrequests.ListWorkRequestsResponse, error)
+	ListWorkRequestErrors(context.Context, workrequests.ListWorkRequestErrorsRequest) (workrequests.ListWorkRequestErrorsResponse, error)
+}
+
 type instancePoolCache struct {
 	mu                   sync.Mutex
 	poolCache            map[string]*core.InstancePool
@ -57,9 +66,10 @@ type instancePoolCache struct {
 	computeManagementClient ComputeMgmtClient
 	computeClient           ComputeClient
 	virtualNetworkClient    VirtualNetworkClient
+	workRequestsClient      WorkRequestClient
 }

-func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient) *instancePoolCache {
+func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient, workRequestsClient WorkRequestClient) *instancePoolCache {
 	return &instancePoolCache{
 		poolCache:               map[string]*core.InstancePool{},
 		instanceSummaryCache:    map[string]*[]core.InstanceSummary{},
@ -67,6 +77,7 @@ func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClie
 		computeManagementClient: computeManagementClient,
 		computeClient:           computeClient,
 		virtualNetworkClient:    virtualNetworkClient,
+		workRequestsClient:      workRequestsClient,
 	}
 }

@ -82,16 +93,16 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
 	// Since we only support static instance-pools we don't need to worry about pruning.

 	for id := range staticInstancePools {
-		resp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
+		getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
 			InstancePoolId: common.String(id),
 		})
 		if err != nil {
 			klog.Errorf("get instance pool %s failed: %v", id, err)
 			return err
 		}
-		klog.V(6).Infof("GetInstancePool() response %v", resp.InstancePool)
+		klog.V(6).Infof("GetInstancePool() response %v", getInstancePoolResp.InstancePool)

-		c.setInstancePool(&resp.InstancePool)
+		c.setInstancePool(&getInstancePoolResp.InstancePool)

 		var instanceSummaries []core.InstanceSummary
 		var page *string
@ -112,7 +123,32 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
 				break
 			}
 		}
-		c.setInstanceSummaries(*resp.InstancePool.Id, &instanceSummaries)
+		c.setInstanceSummaries(id, &instanceSummaries)
+		// Compare instance pool's size with the latest number of InstanceSummaries. If found, look for unrecoverable
+		// errors such as quota or capacity issues in scaling pool.
+		if len(*c.instanceSummaryCache[id]) < *c.poolCache[id].Size {
+			klog.V(4).Infof("Instance pool %s has only %d instances created while requested count is %d. ",
+				*getInstancePoolResp.InstancePool.DisplayName, len(*c.instanceSummaryCache[id]), *c.poolCache[id].Size)
+
+			if getInstancePoolResp.LifecycleState != core.InstancePoolLifecycleStateRunning {
+				lastWorkRequest, err := c.lastStartedWorkRequest(*getInstancePoolResp.CompartmentId, id)
+
+				// The last started work request may be many minutes old depending on sync interval
+				// and exponential backoff time of OCI retried OCI operations.
+				if err == nil && *lastWorkRequest.OperationType == ociInstancePoolLaunchOp &&
+					lastWorkRequest.Status == workrequests.WorkRequestSummaryStatusFailed {
+					unrecoverableErrorMsg := c.firstUnrecoverableErrorForWorkRequest(*lastWorkRequest.Id)
+					if unrecoverableErrorMsg != "" {
+						klog.V(4).Infof("Creating placeholder instances for %s.", *getInstancePoolResp.InstancePool.DisplayName)
+						for i := len(*c.instanceSummaryCache[id]); i < *c.poolCache[id].Size; i++ {
+							c.addUnfulfilledInstanceToCache(id, fmt.Sprintf("%s%s-%d", instanceIDUnfulfilled,
+								*getInstancePoolResp.InstancePool.Id, i), *getInstancePoolResp.InstancePool.CompartmentId,
+								fmt.Sprintf("%s-%d", *getInstancePoolResp.InstancePool.DisplayName, i))
+						}
+					}
+				}
+			}
+		}
 	}

 	// Reset unowned instances cache.
@ -121,6 +157,15 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
 	return nil
 }

+func (c *instancePoolCache) addUnfulfilledInstanceToCache(instancePoolID, instanceID, compartmentID, name string) {
+	*c.instanceSummaryCache[instancePoolID] = append(*c.instanceSummaryCache[instancePoolID], core.InstanceSummary{
+		Id:            common.String(instanceID),
+		CompartmentId: common.String(compartmentID),
+		State:         common.String(instanceStateUnfulfilled),
+		DisplayName:   common.String(name),
+	})
+}
+
 // removeInstance tries to remove the instance from the specified instance pool. If the instance isn't in the array,
 // then it won't do anything removeInstance returns true if it actually removed the instance and reduced the size of
 // the instance pool.
@ -131,7 +176,12 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
 		return false
 	}

-	_, err := c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{
+	var err error
+	if strings.Contains(instanceID, instanceIDUnfulfilled) {
+		// For an unfulfilled instance, reduce the target size of the instance pool and remove the placeholder instance from cache.
+		err = c.setSize(instancePool.Id(), *c.poolCache[instancePool.Id()].Size-1)
+	} else {
+		_, err = c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{
 			InstancePoolId: common.String(instancePool.Id()),
 			DetachInstancePoolInstanceDetails: core.DetachInstancePoolInstanceDetails{
 				InstanceId:      common.String(instanceID),
@ -139,11 +189,14 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
 				IsAutoTerminate: common.Bool(true),
 			},
 		})
+	}

 	if err == nil {
 		c.mu.Lock()
-		// Decrease pool size in cache since IsDecrementSize was true
+		// Decrease pool size in cache
 		c.poolCache[instancePool.Id()].Size = common.Int(*c.poolCache[instancePool.Id()].Size - 1)
+		// Since we're removing the instance from cache, we don't need to expire the pool cache
+		c.removeInstanceSummaryFromCache(instancePool.Id(), instanceID)
 		c.mu.Unlock()
 		return true
 	}
@ -156,6 +209,12 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
 // through the configured instance-pools (ListInstancePoolInstances) for a match.
 func (c *instancePoolCache) findInstanceByDetails(ociInstance OciRef) (*OciRef, error) {

+	// Unfilled instance placeholder
+	if strings.Contains(ociInstance.Name, instanceIDUnfulfilled) {
+		instIndex := strings.LastIndex(ociInstance.Name, "-")
+		ociInstance.PoolID = strings.Replace(ociInstance.Name[:instIndex], instanceIDUnfulfilled, "", 1)
+		return &ociInstance, nil
+	}
 	// Minimum amount of information we need to make a positive match
 	if ociInstance.InstanceID == "" && ociInstance.PrivateIPAddress == "" && ociInstance.PublicIPAddress == "" {
 		return nil, errors.New("instance id or an IP address is required to resolve details")
@ -321,6 +380,7 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
 		return err
 	}

+	isScaleUp := size > *getInstancePoolResp.Size
 	scaleDelta := int(math.Abs(float64(*getInstancePoolResp.Size - size)))

 	updateDetails := core.UpdateInstancePoolDetails{
@ -336,17 +396,25 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
 		return err
 	}

+	c.mu.Lock()
+	c.poolCache[instancePoolID].Size = common.Int(size)
+	c.mu.Unlock()
+
+	// Just return Immediately if this was a scale down to be consistent with DetachInstancePoolInstance
+	if !isScaleUp {
+		return nil
+	}
+
+	// Only wait for scale up (not scale down)
 	ctx := context.Background()
 	ctx, cancelFunc := context.WithTimeout(ctx, maxScalingWaitTime(scaleDelta, 20, 10*time.Minute))
 	// Ensure this context is always canceled so channels, go routines, etc. always complete.
 	defer cancelFunc()
-
 	// Wait for the number of Running instances in this pool to reach size
 	err = c.waitForRunningInstanceCount(ctx, size, instancePoolID, *getInstancePoolResp.CompartmentId)
 	if err != nil {
 		return err
 	}
-
 	// Allow an additional time for the pool State to reach Running
 	ctx, _ = context.WithTimeout(ctx, 10*time.Minute)
 	err = c.waitForState(ctx, instancePoolID, core.InstancePoolLifecycleStateRunning)
@ -354,10 +422,6 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
 		return err
 	}

-	c.mu.Lock()
-	c.poolCache[instancePoolID].Size = common.Int(size)
-	c.mu.Unlock()
-
 	return nil
 }

@ -446,10 +510,21 @@ func (c *instancePoolCache) monitorScalingProgress(ctx context.Context, target i
 				return
 			}

+			// Fail scale (up) operation fast by watching for unrecoverable errors such as quota or capacity issues
+			lastWorkRequest, err := c.lastStartedWorkRequest(compartmentID, instancePoolID)
+			if err == nil && *lastWorkRequest.OperationType == ociInstancePoolLaunchOp &&
+				lastWorkRequest.Status == workrequests.WorkRequestSummaryStatusInProgress {
+				unrecoverableErrorMsg := c.firstUnrecoverableErrorForWorkRequest(*lastWorkRequest.Id)
+				if unrecoverableErrorMsg != "" {
+					errCh <- errors.New(unrecoverableErrorMsg)
+					return
+				}
+			}
+
 			var page *string
 			numRunningInstances := 0
 			for {
-				// List instances in the pool
+				// Next, wait until the number of instances in the pool reaches the target
 				listInstancePoolInstances, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{
 					InstancePoolId: common.String(instancePoolID),
 					CompartmentId:  common.String(compartmentID),
@ -509,6 +584,21 @@ func (c *instancePoolCache) getSize(id string) (int, error) {
 	return *pool.Size, nil
 }

+// removeInstanceSummaryFromCache removes looks through the pool cache for an InstanceSummary with the specified ID and
+// removes it if found
+func (c *instancePoolCache) removeInstanceSummaryFromCache(instancePoolID, instanceID string) {
+	var instanceSummaries []core.InstanceSummary
+
+	if instanceSummaryCache, found := c.instanceSummaryCache[instancePoolID]; found {
+		for _, instanceSummary := range *instanceSummaryCache {
+			if instanceSummary.Id != nil && *instanceSummary.Id != instanceID {
+				instanceSummaries = append(instanceSummaries, instanceSummary)
+			}
+		}
+		c.instanceSummaryCache[instancePoolID] = &instanceSummaries
+	}
+}
+
 // maxScalingWaitTime estimates the maximum amount of time, as a duration, that to scale size instances.
 // note, larger scale operations are broken up internally to smaller batches. This is an internal detail
 // and can be overridden on a tenancy basis. 20 is a good default.
@ -528,3 +618,57 @@ func maxScalingWaitTime(size, batchSize int, timePerBatch time.Duration) time.Du

 	return maxScalingWaitTime + buffer
 }
+
+// lastStartedWorkRequest returns the *last started* work request for the specified resource or an error if none are found
+func (c *instancePoolCache) lastStartedWorkRequest(compartmentID, resourceID string) (workrequests.WorkRequestSummary, error) {
+
+	klog.V(6).Infof("Looking for the last started work request for resource %s.", resourceID)
+	listWorkRequests, err := c.workRequestsClient.ListWorkRequests(context.Background(), workrequests.ListWorkRequestsRequest{
+		CompartmentId: common.String(compartmentID),
+		Limit:         common.Int(100),
+		ResourceId:    common.String(resourceID),
+	})
+	if err != nil {
+		klog.Errorf("list work requests for %s failed: %v", resourceID, err)
+		return workrequests.WorkRequestSummary{}, err
+	}
+
+	var lastStartedWorkRequest = workrequests.WorkRequestSummary{}
+	for i, nextWorkRequest := range listWorkRequests.Items {
+		if i == 0 && nextWorkRequest.TimeStarted != nil {
+			lastStartedWorkRequest = nextWorkRequest
+		} else {
+			if nextWorkRequest.TimeStarted != nil && nextWorkRequest.TimeStarted.After(lastStartedWorkRequest.TimeStarted.Time) {
+				lastStartedWorkRequest = nextWorkRequest
+			}
+		}
+	}
+
+	if lastStartedWorkRequest.TimeStarted != nil {
+		return lastStartedWorkRequest, nil
+	}
+
+	return workrequests.WorkRequestSummary{}, errors.New("no work requests found")
+}
+
+// firstUnrecoverableErrorForWorkRequest returns the first non-recoverable error message associated with the specified
+// work-request ID, or the empty string if none are found.
+func (c *instancePoolCache) firstUnrecoverableErrorForWorkRequest(workRequestID string) string {
+
+	klog.V(6).Infof("Looking for non-recoverable errors for work request %s.", workRequestID)
+	// Look through the error logs looking for known unrecoverable error messages(s)
+	workRequestErrors, _ := c.workRequestsClient.ListWorkRequestErrors(context.Background(),
+		workrequests.ListWorkRequestErrorsRequest{WorkRequestId: common.String(workRequestID),
+			SortOrder: workrequests.ListWorkRequestErrorsSortOrderDesc})
+	for _, nextErr := range workRequestErrors.Items {
+		// Abort wait for certain unrecoverable errors such as capacity and quota issues
+		if strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("QuotaExceeded")) ||
+			strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("LimitExceeded")) ||
+			strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("OutOfCapacity")) {
+			klog.V(4).Infof("Found unrecoverable error(s) in work request %s.", workRequestID)
+			return *nextErr.Message
+		}
+	}
+	klog.V(6).Infof("No non-recoverable errors for work request %s found.", workRequestID)
+	return ""
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go
@ -18,19 +18,19 @@ package oci

 import (
 	"fmt"
-	"gopkg.in/gcfg.v1"
 	"os"
 	"strconv"
 	"strings"
 	"time"

+	"gopkg.in/gcfg.v1"
+
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/klog/v2"
-	kubeletapis "k8s.io/kubelet/pkg/apis"

 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"

@ -38,6 +38,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
 )

 var (
@ -163,6 +164,12 @@ func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovid
 	}
 	networkClient.SetCustomClientConfiguration(clientConfig)

+	workRequestClient, err := workrequests.NewWorkRequestClientWithConfigurationProvider(configProvider)
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to create work request client")
+	}
+	workRequestClient.SetCustomClientConfiguration(clientConfig)
+
 	cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar)

 	// Not passed by --cloud-config or environment variable, attempt to use the tenancy ID as the compartment ID
@ -178,7 +185,7 @@ func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovid
 		cfg:                 cloudConfig,
 		staticInstancePools: map[string]*InstancePoolNodeGroup{},
 		shapeGetter:         createShapeGetter(ShapeClientImpl{computeMgmtClient: computeMgmtClient, computeClient: computeClient}),
-		instancePoolCache:   newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient),
+		instancePoolCache:   newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient, &workRequestClient),
 		kubeClient:          kubeClient,
 	}

@ -270,6 +277,18 @@ func (m *InstancePoolManagerImpl) forceRefresh() error {
 	return nil
 }

+func (m *InstancePoolManagerImpl) forceRefreshInstancePool(instancePoolID string) error {
+
+	if m.cfg == nil {
+		return errors.New("instance pool manager does have a required config")
+	}
+
+	if instancePoolCache, found := m.staticInstancePools[instancePoolID]; found {
+		return m.instancePoolCache.rebuild(map[string]*InstancePoolNodeGroup{instancePoolID: instancePoolCache}, *m.cfg)
+	}
+	return errors.New("instance pool not found")
+}
+
 // Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
 func (m *InstancePoolManagerImpl) Cleanup() error {
 	return nil
@ -287,7 +306,7 @@ func (m *InstancePoolManagerImpl) GetInstancePools() []*InstancePoolNodeGroup {
 // GetInstancePoolNodes returns InstancePool nodes that are not in a terminal state.
 func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error) {

-	klog.V(4).Infof("getting instances for node pool: %q", ip.Id())
+	klog.V(4).Infof("getting (cached) instances for node pool: %q", ip.Id())

 	instanceSummaries, err := m.instancePoolCache.getInstanceSummaries(ip.Id())
 	if err != nil {
@ -312,6 +331,13 @@ func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup)
 			status.State = cloudprovider.InstanceDeleting
 		case string(core.InstanceLifecycleStateStopping):
 			status.State = cloudprovider.InstanceDeleting
+		case instanceStateUnfulfilled:
+			status.State = cloudprovider.InstanceCreating
+			status.ErrorInfo = &cloudprovider.InstanceErrorInfo{
+				ErrorClass:   cloudprovider.OutOfResourcesErrorClass,
+				ErrorCode:    instanceStateUnfulfilled,
+				ErrorMessage: "OCI cannot provision additional instances for this instance pool. Review quota and/or capacity.",
+			}
 		}

 		// Instance not in a terminal or unknown state, ok to add.
@ -390,10 +416,14 @@ func (m *InstancePoolManagerImpl) GetInstancePoolSize(ip InstancePoolNodeGroup)

 // SetInstancePoolSize sets instance-pool size.
 func (m *InstancePoolManagerImpl) SetInstancePoolSize(np InstancePoolNodeGroup, size int) error {
+	klog.Infof("SetInstancePoolSize (%d) called on instance pool %s", size, np.Id())

-	err := m.instancePoolCache.setSize(np.Id(), size)
-	if err != nil {
-		return err
+	setSizeErr := m.instancePoolCache.setSize(np.Id(), size)
+	klog.V(5).Infof("SetInstancePoolSize was called: refreshing instance pool cache")
+	// refresh instance pool cache after update (regardless if there was an error or not)
+	_ = m.forceRefreshInstancePool(np.Id())
+	if setSizeErr != nil {
+		return setSizeErr
 	}

 	// Interface says this function should wait until node group size is updated.
@ -496,10 +526,8 @@ func getInstancePoolAvailabilityDomain(ip *core.InstancePool) (string, error) {

 func buildGenericLabelsForInstancePool(instancePool *core.InstancePool, nodeName, shape, availabilityDomain string) map[string]string {
 	result := make(map[string]string)
-	result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
 	result[apiv1.LabelArchStable] = cloudprovider.DefaultArch

-	result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
 	result[apiv1.LabelOSStable] = cloudprovider.DefaultOS

 	parts := strings.Split(*instancePool.Id, ".")
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go
@ -3,6 +3,7 @@ package oci
 import (
 	"context"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
 	"reflect"
 	"testing"

@ -29,6 +30,22 @@ type mockComputeClient struct {
 	listVnicAttachmentsResponse core.ListVnicAttachmentsResponse
 }

+type mockWorkRequestClient struct {
+	err error
+}
+
+func (m *mockWorkRequestClient) GetWorkRequest(ctx context.Context, request workrequests.GetWorkRequestRequest) (workrequests.GetWorkRequestResponse, error) {
+	return workrequests.GetWorkRequestResponse{}, m.err
+}
+
+func (m *mockWorkRequestClient) ListWorkRequests(ctx context.Context, request workrequests.ListWorkRequestsRequest) (workrequests.ListWorkRequestsResponse, error) {
+	return workrequests.ListWorkRequestsResponse{}, m.err
+}
+
+func (m *mockWorkRequestClient) ListWorkRequestErrors(ctx context.Context, request workrequests.ListWorkRequestErrorsRequest) (workrequests.ListWorkRequestErrorsResponse, error) {
+	return workrequests.ListWorkRequestErrorsResponse{}, m.err
+}
+
 func (m *mockComputeClient) ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error) {
 	return m.listVnicAttachmentsResponse, m.err
 }
@ -111,6 +128,10 @@ var virtualNetworkClient = &mockVirtualNetworkClient{
 	},
 }

+var workRequestsClient = &mockWorkRequestClient{
+	err: nil,
+}
+
 func TestInstancePoolFromArgs(t *testing.T) {

 	value := `1:5:ocid1.instancepool.oc1.phx.aaaaaaaah`
@ -146,7 +167,7 @@ func TestInstancePoolFromArgs(t *testing.T) {

 func TestGetSetInstancePoolSize(t *testing.T) {

-	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
 	nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaai"] = &core.InstancePool{Size: common.Int(2)}

 	manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache}
@ -183,7 +204,7 @@ func TestGetSetInstancePoolSize(t *testing.T) {

 func TestGetInstancePoolForInstance(t *testing.T) {

-	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
 	nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
 		Id:   common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
 		Size: common.Int(1),
@ -267,7 +288,7 @@ func TestGetInstancePoolForInstance(t *testing.T) {

 func TestGetInstancePoolNodes(t *testing.T) {

-	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
 	nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
 		Id:             common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
 		CompartmentId:  common.String("ocid1.compartment.oc1..aaaaaaaa1"),
@ -406,7 +427,7 @@ func TestGetInstancePoolsAndInstances(t *testing.T) {
 		staticInstancePools: map[string]*InstancePoolNodeGroup{
 			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
 		},
-		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
+		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient),
 	}

 	// Populate cache(s) (twice to increase code coverage).
@ -481,7 +502,7 @@ func TestDeleteInstances(t *testing.T) {
 		staticInstancePools: map[string]*InstancePoolNodeGroup{
 			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
 		},
-		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
+		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient),
 	}
 	manager.shapeGetter = createShapeGetter(shapeClient)
 	// Populate cache(s).
--- a/cluster-autoscaler/cloudprovider/oci/oci_ref.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_ref.go
@ -18,6 +18,7 @@ package oci

 import (
 	apiv1 "k8s.io/api/core/v1"
+	"strings"
 )

 // OciRef contains s reference to some entity in OCI world.
@ -33,6 +34,7 @@ type OciRef struct {
 }

 func nodeToOciRef(n *apiv1.Node) (OciRef, error) {
+
 	return OciRef{
 		Name:               n.ObjectMeta.Name,
 		AvailabilityDomain: getNodeAZ(n),
@ -90,6 +92,12 @@ func getNodeExternalAddress(node *apiv1.Node) string {
 // getNodeInstancePoolID returns the instance pool ID if set as a label or annotation or an empty string if is not found.
 func getNodeInstancePoolID(node *apiv1.Node) string {

+	// Handle unfilled instance placeholder (instances that have yet to be created)
+	if strings.Contains(node.Name, instanceIDUnfulfilled) {
+		instIndex := strings.LastIndex(node.Name, "-")
+		return strings.Replace(node.Name[:instIndex], instanceIDUnfulfilled, "", 1)
+	}
+
 	poolIDPrefixLabel, _ := node.Labels[instancePoolIDLabelPrefix]
 	poolIDSuffixLabel, _ := node.Labels[instancePoolIDLabelSuffix]

@ -104,6 +112,11 @@ func getNodeInstancePoolID(node *apiv1.Node) string {
 // getNodeInstanceID returns the instance ID if set as a label or annotation or an empty string if is not found.
 func getNodeInstanceID(node *apiv1.Node) string {

+	// Handle unfilled instance placeholder (instances that have yet to be created)
+	if strings.Contains(node.Name, instanceIDUnfulfilled) {
+		return node.Name
+	}
+
 	instancePrefixLabel, _ := node.Labels[instanceIDLabelPrefix]
 	instanceSuffixLabel, _ := node.Labels[instanceIDLabelSuffix]

--- a/cluster-autoscaler/cloudprovider/oci/oci_shape.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_shape.go
@ -19,6 +19,7 @@ package oci
 import (
 	"context"
 	"fmt"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
 	"k8s.io/klog/v2"
 )
@ -117,10 +118,29 @@ func (osf *shapeGetterImpl) GetInstancePoolShape(ip *core.InstancePool) (*Shape,
 				shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs * 1024 * 1024 * 1024
 			}
 		} else {
-			allShapes, _ := osf.shapeClient.ListShapes(context.Background(), core.ListShapesRequest{
-				CompartmentId: instanceConfig.CompartmentId,
-			})
-			for _, nextShape := range allShapes.Items {
+			// Fetch the shape object by name
+			var page *string
+			var everyShape []core.Shape
+			for {
+				// List all available shapes
+				lisShapesReq := core.ListShapesRequest{}
+				lisShapesReq.CompartmentId = instanceConfig.CompartmentId
+				lisShapesReq.Page = page
+				lisShapesReq.Limit = common.Int(50)
+
+				listShapes, err := osf.shapeClient.ListShapes(context.Background(), lisShapesReq)
+				if err != nil {
+					return nil, err
+				}
+
+				everyShape = append(everyShape, listShapes.Items...)
+
+				if page = listShapes.OpcNextPage; listShapes.OpcNextPage == nil {
+					break
+				}
+			}
+
+			for _, nextShape := range everyShape {
 				if *nextShape.Shape == *instanceDetails.LaunchDetails.Shape {
 					shape.Name = *nextShape.Shape
 					if nextShape.Ocpus != nil {
--- a/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go
@ -4,7 +4,6 @@ import (
 	"context"
 	apiv1 "k8s.io/api/core/v1"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
-	kubeletapis "k8s.io/kubelet/pkg/apis"
 	"reflect"
 	"strings"
 	"testing"
@ -117,7 +116,7 @@ func TestGetShape(t *testing.T) {
 }

 func TestGetInstancePoolTemplateNode(t *testing.T) {
-	instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
 	instancePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
 		Id:             common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
 		CompartmentId:  common.String("ocid1.compartment.oc1..aaaaaaaa1"),
@ -182,9 +181,7 @@ func TestBuildGenericLabels(t *testing.T) {
 	availabilityDomain := "US-ASHBURN-1"

 	expected := map[string]string{
-		kubeletapis.LabelArch:              cloudprovider.DefaultArch,
 		apiv1.LabelArchStable:              cloudprovider.DefaultArch,
-		kubeletapis.LabelOS:                cloudprovider.DefaultOS,
 		apiv1.LabelOSStable:                cloudprovider.DefaultOS,
 		apiv1.LabelZoneRegion:              "phx",
 		apiv1.LabelZoneRegionStable:        "phx",
--- a/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/ovhcloud/ovh_cloud_provider.go
@ -151,6 +151,11 @@ func (provider *OVHCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovi
 	return ng, err
 }

+// HasInstance returns whether a given node has a corresponding instance in this cloud provider
+func (provider *OVHCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
+	return true, cloudprovider.ErrNotImplemented
+}
+
 // findNodeGroupFromCache tries to retrieve the associated node group from an already built mapping in cache
 func (provider *OVHCloudProvider) findNodeGroupFromCache(providerID string) cloudprovider.NodeGroup {
 	if ng, ok := provider.manager.NodeGroupPerProviderID[providerID]; ok {
--- a/Show More
+++ b/Show More