OCI provider based on instance-pools and instance-configurations.

2021-08-02 16:46:52 -07:00 · 2021-08-02 16:46:52 -07:00 · 65ebd8db83
parent d3784072e4
commit 65ebd8db83
22 changed files with 3101 additions and 4 deletions
--- a/cluster-autoscaler/README.md
+++ b/cluster-autoscaler/README.md
@ -25,6 +25,7 @@ You should also take a look at the notes and "gotchas" for your specific cloud p
 * [IonosCloud](./cloudprovider/ionoscloud/README.md)
 * [OVHcloud](./cloudprovider/ovhcloud/README.md)
 * [Linode](./cloudprovider/linode/README.md)
+* [OracleCloud](./cloudprovider/oci/README.md)
 * [ClusterAPI](./cloudprovider/clusterapi/README.md)
 * [BizflyCloud](./cloudprovider/bizflycloud/README.md)

@ -165,5 +166,6 @@ Supported cloud providers:
 * Equinix Metal https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md
 * OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
 * Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
+* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
 * Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md
 * Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md
--- a/cluster-autoscaler/cloudprovider/builder/builder_all.go
+++ b/cluster-autoscaler/cloudprovider/builder/builder_all.go
@ -1,5 +1,5 @@
-//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet
-// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet
+//go:build !gce && !aws && !azure && !kubemark && !alicloud && !magnum && !digitalocean && !clusterapi && !huaweicloud && !ionoscloud && !linode && !hetzner && !bizflycloud && !brightbox && !packet && !oci
+// +build !gce,!aws,!azure,!kubemark,!alicloud,!magnum,!digitalocean,!clusterapi,!huaweicloud,!ionoscloud,!linode,!hetzner,!bizflycloud,!brightbox,!packet,!oci

 /*
 Copyright 2018 The Kubernetes Authors.
@ -37,6 +37,7 @@ import (
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ionoscloud"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/linode"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/magnum"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/ovhcloud"
 	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/packet"
 	"k8s.io/autoscaler/cluster-autoscaler/config"
@ -55,6 +56,7 @@ var AvailableCloudProviders = []string{
 	cloudprovider.ExoscaleProviderName,
 	cloudprovider.HuaweicloudProviderName,
 	cloudprovider.HetznerProviderName,
+	cloudprovider.OracleCloudProviderName,
 	cloudprovider.OVHcloudProviderName,
 	cloudprovider.ClusterAPIProviderName,
 	cloudprovider.IonoscloudProviderName,
@ -105,6 +107,8 @@ func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGro
 		return ionoscloud.BuildIonosCloud(opts, do, rl)
 	case cloudprovider.LinodeProviderName:
 		return linode.BuildLinode(opts, do, rl)
+	case cloudprovider.OracleCloudProviderName:
+		return oci.BuildOCI(opts, do, rl)
 	}
 	return nil
 }
--- a/cluster-autoscaler/cloudprovider/builder/builder_oci.go
+++ b/cluster-autoscaler/cloudprovider/builder/builder_oci.go
@ -0,0 +1,43 @@
+//go:build oci
+// +build oci
+
+/*
+Copyright 2020 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package builder
+
+import (
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci"
+	"k8s.io/autoscaler/cluster-autoscaler/config"
+)
+
+// AvailableCloudProviders supported by the cloud provider builder.
+var AvailableCloudProviders = []string{
+	cloudprovider.OracleCloudProviderName,
+}
+
+// DefaultCloudProvider for oci-only build is oci.
+const DefaultCloudProvider = cloudprovider.OracleCloudProviderName
+
+func buildCloudProvider(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
+	switch opts.CloudProviderName {
+	case cloudprovider.OracleCloudProviderName:
+		return oci.BuildOCI(opts, do, rl)
+	}
+
+	return nil
+}
--- a/cluster-autoscaler/cloudprovider/cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloud_provider.go
@ -60,6 +60,8 @@ const (
 	HuaweicloudProviderName = "huaweicloud"
 	// IonoscloudProviderName gets the provider name of ionoscloud
 	IonoscloudProviderName = "ionoscloud"
+	// OracleCloudProviderName gets the provider name of oci
+	OracleCloudProviderName = "oci"
 	// OVHcloudProviderName gets the provider name of ovhcloud
 	OVHcloudProviderName = "ovhcloud"
 	// LinodeProviderName gets the provider name of linode
--- a/cluster-autoscaler/cloudprovider/oci/OWNERS
+++ b/cluster-autoscaler/cloudprovider/oci/OWNERS
@ -0,0 +1,5 @@
+approvers:
+#- jlamillan
+reviewers:
+#- jlamillan
+#- ericrrath
--- a/cluster-autoscaler/cloudprovider/oci/README.md
+++ b/cluster-autoscaler/cloudprovider/oci/README.md
@ -0,0 +1,220 @@
+# Cluster Autoscaler for Oracle Cloud Infrastructure (OCI)
+
+On OCI, the cluster-autoscaler utilizes [Instance Pools](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstancepool.htm) 
+combined with [Instance Configurations](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm) to 
+automatically resize a cluster's nodes based on application workload demands by:
+
+- adding nodes to static instance-pool(s) when a pod cannot be scheduled in the cluster because of insufficient resource constraints.
+- removing nodes from an instance-pool(s) when the nodes have been underutilized for an extended time, and when pods can be placed on other existing nodes.
+
+The cluster-autoscaler works on a per-instance pool basis. You configure the cluster-autoscaler to tell it which instance pools to target 
+for expansion and contraction, the minimum and maximum sizes for each pool, and how you want the autoscaling to take place. 
+Instance pools not referenced in the configuration file are not managed by the cluster-autoscaler.
+
+## Create Required OCI Resources
+
+### IAM Policy (if using Instance Principals)
+
+We recommend setting up and configuring the cluster-autoscaler to use 
+[Instance Principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm) 
+to authenticate to the OCI APIs.
+
+The following policy provides the minimum privileges necessary for Cluster Autoscaler to run:
+
+1: Create a compartment-level dynamic group containing the nodes (compute instances) in the cluster:
+
+```
+All {instance.compartment.id = 'ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf'}
+```
+
+2: Create a *tenancy-level* policy to allow nodes to manage instance-pools:
+
+```
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-pools in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-configurations in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to manage instance-family in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use subnets in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to read virtual-network-family in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to use vnics in compartment <compartment-name>
+Allow dynamic-group acme-oci-cluster-autoscaler-dyn-grp to inspect compartments in compartment <compartment-name>
+```
+
+### Instance Pool and Instance Configurations
+
+Before you deploy the cluster auto-scaler on OCI, your need to create one or more static Instance Pools and Instance 
+Configuration with `cloud-init` specified in the launch details so new nodes automatically joins the existing cluster on 
+start up.
+
+Advanced Instance Pool and Instance Configuration configuration is out of scope for this document. However, a 
+working [instance-details.json](./examples/instance-details.json) and [placement-config.json](./examples/placement-config.json) 
+([example](./examples/instance-details.json) based on Rancher [RKE](https://rancher.com/products/rke/)) using [cloud-init](https://cloudinit.readthedocs.io/en/latest/) are
+included in the examples, which can be applied using the [OCI CLI](https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/cliinstall.htm).
+
+Modify the `user_data` in the example [instance-details.json](./examples/instance-details.json) to suit your needs, re-base64 encode, apply:
+
+```bash
+# e.g. cloud-init. Modify, re-encode, and update user_data in instance-details.json to suit your needs:
+
+$ echo IyEvYmluL2Jhc2gKdG91hci9saWIvYXB0L....1yZXRyeSAzIGhG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg==  | base64 -D
+
+#!/bin/bash
+groupadd docker
+usermod -aG docker ubuntu
+curl --retry 3 https://releases.rancher.com/install-docker/20.10.sh | sh
+docker run -d --privileged --restart=unless-stopped --net=host -v /etc/kubernetes:/etc/kubernetes -v /var/run:/var/run rancher/rancher-agent:v2.5.5 --server https://my-rancher.com --token xxxxxx  --worker
+```
+
+```bash
+$ oci compute-management instance-configuration create --instance-details file://./cluster-autoscaler/cloudprovider/oci/examples/instance-details.json --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --query 'data.id' --raw-output
+
+ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq
+
+$ oci compute-management instance-pool create --compartment-id ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf --instance-configuration-id ocid1.instanceconfiguration.oc1.phx.aaaaaaaa3neul67zb3goz43lybosc2o3fv67gj3zazexbb3vfcbypmpznhtq --placement-configurations file://./cluster-autoscaler/cloudprovider/oci/examples/placement-config.json --size 0 --wait-for-state RUNNING --query 'data.id' --raw-output
+
+Action completed. Waiting until the resource has entered state: ('RUNNING',)
+ocid1.instancepool.oc1.phx.aaaaaaaayd5bxwrzomzr2b2enchm4mof7uhw7do5hc2afkhks576syikk2ca
+```
+
+## Configure Autoscaler
+
+Use the `--nodes=<min-nodes>:<max-nodes>:<instancepool-ocid>` parameter to specify which pre-existing instance 
+pools to target for automatic expansion and contraction, the minimum and maximum sizes for each node pool, and how you 
+want the autoscaling to take place. Instance pools not referenced in the configuration file are not managed by the 
+autoscaler where:
+
+- `<min-nodes>` is the minimum number of nodes allowed in the instance-pool.
+- `<max-nodes>` is the maximum number of nodes allowed in the instance-pool. Make sure the maximum number of nodes you specify does not exceed the tenancy limits for the node shape defined for the node pool.
+- `<instancepool-ocid>` is the OCIDs of a pre-existing instance-pool.
+
+If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct
+value in the deployment e.g.:
+
+```yaml
+env:
+  - name: OCI_REGION
+    value: "us-phoenix-1"
+```
+
+### Optional cloud-config file
+
+_Optional_ cloud-config file mounted in the path specified by `--cloud-config`.
+
+An example, of passing optional configuration via `cloud-config` file that uses configures the cluster-autoscaler to use 
+instance-principals authenticating via instance principalsand only see configured instance-pools in a single compartment:
+
+```ini
+[Global]
+compartment-id = ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf
+region = uk-london-1
+use-instance-principals = true
+```
+
+### Environment variables
+
+Configuration via environment-variables:
+
+- `OCI_USE_INSTANCE_PRINCIPAL` - Whether to use Instance Principals for authentication rather than expecting an OCI config file to be mounted in the container. Defaults to false.
+- `OCI_REGION` - **Required** when using Instance Principals. e.g. `OCI_REGION=us-phoenix-1`. See [region list](https://docs.oracle.com/en-us/iaas/Content/General/Concepts/regions.htm) for identifiers.
+- `OCI_COMPARTMENT_ID`  - Restrict the cluster-autoscaler to instance-pools in a single compartment. When unset, the cluster-autoscaler will manage each specified instance-pool no matter which compartment they are in.
+- `OCI_REFRESH_INTERVAL`  - Optional refresh interval to sync internal cache with OCI API defaults to `2m`.
+
+## Deployment
+
+### Create OCI config secret (only if _not_ using Instance Principals)
+
+If you are opting for a file based OCI configuration (as opposed to instance principals), the OCI config file and private key need to be mounted into the container filesystem using a secret volume.
+
+The following policy is required when the specified is not an administrator to run the cluster-autoscaler:
+
+```
+Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-pools in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-configurations in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to manage instance-family in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to use subnets in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to read virtual-network-family in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to use vnics in compartment <compartment-name>
+Allow group acme-oci-cluster-autoscaler-user-grp to inspect compartments in compartment <compartment-name>
+```
+
+Example OCI config file (note `key_file` is the expected path and filename of the OCI API private-key from the perspective of the container):
+
+```bash
+$ cat ~/.oci/config
+
+[DEFAULT]
+user=ocid1.user.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+fingerprint=xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx:xx
+key_file=/root/.oci/api_key.pem
+tenancy=ocid1.tenancy.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+pass_phrase=
+region=us-phoenix-1
+```
+
+Create the secret (`api_key.pem` key name is required):
+
+```bash
+kubectl create secret generic oci-config -n kube-system --from-file=/Users/me/.oci/config --from-file=api_key.pem=/Users/me/.oci/my_api_key.pem
+```
+
+### Example Deployment
+
+Two example deployments of the cluster-autoscaler that manage instancepools are located in the [examples](./examples/) directory.
+[oci-ip-cluster-autoscaler-w-principals.yaml](./examples/oci-ip-cluster-autoscaler-w-principals.yaml) uses
+instance principals, and [oci-ip-cluster-autoscaler-w-config.yaml](./examples/oci-ip-cluster-autoscaler-w-config.yaml) uses file
+based authentication.
+
+Note the 3 specified instance-pools are intended to correspond to different availability domains in the Phoenix, AZ region:
+
+```yaml
+...
+      containers:
+        - image: docker.io/jlamillan/autoscaler:oci-pr-rc6
+          name: cluster-autoscaler
+          command:
+            - ./cluster-autoscaler
+            - --cloud-provider=oci
+            - --nodes=1:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua
+            - --nodes=0:20:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia
+```
+
+Instance principal based authentication deployment:
+
+Substitute the OCIDs of _your_ instance pool(s) and set the `OCI_REGION` environment variable to the region where your
+instance pool(s) reside before applying the deployment:
+
+```
+kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
+```
+
+OCI config file based authentication deployment:
+
+```
+kubectl apply -f ./cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
+```
+
+## Common Notes and Gotchas:
+- You must configure the instance configuration of new compute instances to join the existing cluster when they start. This can 
+  be accomplished with `cloud-init` / `user-data` in the instance launch configuration [example](./examples/instance-details.json).
+- If opting for a file based OCI configuration (as opposed to instance principals), ensure the OCI config and private-key 
+  PEM files are mounted into the container filesystem at the [expected path](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm). Note the `key_file` option in the example `~/.oci/config` above references a private-key file mounted into container by the example [volumeMount](./examples/oci-ip-cluster-autoscaler-w-config.yaml#L165)
+- Make sure the maximum number of nodes you specify does not exceed the limit for the instance-pool or the tenancy.
+- We recommend creating multiple instance-pools with one availability domain specified so new nodes can be created to meet 
+  affinity requirements across availability domains.
+- If you are authenticating via instance principals, be sure the `OCI_REGION` environment variable is set to the correct 
+  value in the deployment.
+- The cluster-autoscaler will not automatically remove scaled down (terminated) `Node` objects from the Kubernetes API 
+  without assistance from the [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager) (CCM). 
+  If scaled down nodes are lingering in your cluster in the `NotReady` status, ensure the OCI CCM is installed and running 
+  correctly (`oci-cloud-controller-manager`).
+- Avoid manually changing node pools that are managed by the cluster-autoscaler. For example, do not add or remove nodes 
+  using kubectl, or using the Console (or the Oracle Cloud Infrastructure CLI or API). 
+- `--node-group-auto-discovery` and `--node-autoprovisioning-enabled=true` are not supported.
+- We set a `nvidia.com/gpu:NoSchedule` taint on nodes in a GPU enabled instance-pool.
+
+## Helpful links
+- [Oracle Cloud Infrastructure home](https://cloud.oracle.com)
+- [OCI instance configuration documentation](https://docs.oracle.com/en-us/iaas/Content/Compute/Tasks/creatinginstanceconfig.htm)
+- [instance principals](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/callingservicesfrominstances.htm)
+- [OCI Cloud Controller Manager](https://github.com/oracle/oci-cloud-controller-manager)
+- [OCI Container Storage Interface driver](https://github.com/oracle/oci-cloud-controller-manager/blob/master/container-storage-interface.md)
--- a/cluster-autoscaler/cloudprovider/oci/examples/instance-details.json
+++ b/cluster-autoscaler/cloudprovider/oci/examples/instance-details.json
@ -0,0 +1,19 @@
+{
+  "instanceType": "compute",
+  "launchDetails": {
+    "compartmentId": "ocid1.compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf",
+    "shape": "VM.Standard2.8",
+    "sourceDetails":
+    {
+      "imageId": "ocid1.image.oc1.phx.aaaaaaaa55tzajot4gbiw2p7gquwjnvfzrasosbrq4h6wywkff4zjosp2fia",
+      "sourceType": "image",
+      "bootVolumeSizeInGBs": 100
+    },
+    "metadata": {
+      "user_data": "IyEvYmluL2Jhc2gKdG91Y2ggL3RtcC9jbG91ZC1pbml0LXN0YXJ0ZWQKaXB0YWJsZXMgLUYKZ3JvdXBhZGQgZG9ja2VyCnVzZXJtb2QgLWFHIGRvY2tlciB1YnVudHUKcm0gL3Zhci9saWIvYXB0L2xpc3RzL2xvY2sKcGtpbGwgLTkgLWYgYXB0CmN1cmwgLS1yZXRyeSAzIGh0dHBzOi8vcmVsZWFzZXMucmFuY2hlci5jb20vaW5zdGFsbC1kb2NrZXIvMjAuMTAuc2ggfCBzaApkb2NrZXIgcnVuIC1kIC0tcHJpdmlsZWdlZCAtLXJlc3RhcnQ9dW5sZXNzLXN0b3BwZWQgLS1uZXQ9aG9zdCAtdiAvZXRjL2t1YmVybmV0ZXM6L2V0Yy9rdWJlcm5ldGVzIC12IC92YXIvcnVuOi92YXIvcnVuIHJhbmNoZXIvcmFuY2hlci1hZ2VudDp2Mi41LjUgLS1zZXJ2ZXIgaHR0cHM6Ly9teS1yYW5jaGVyLmNvbSAtLXRva2VuIHh4eHh4eCAgLS13b3JrZXIKdG91Y2ggL3RtcC9jbG91ZC1pbml0LWZpbmlzaGVkCg=="
+    },
+    "createVnicDetails": {
+      "assignPublicIp": true
+    }
+  }
+}
--- a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
+++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-config.yaml
@ -0,0 +1,174 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+  name: cluster-autoscaler
+  namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["events", "endpoints"]
+    verbs: ["create", "patch"]
+  - apiGroups: [""]
+    resources: ["pods/eviction"]
+    verbs: ["create"]
+  - apiGroups: [""]
+    resources: ["pods/status"]
+    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["endpoints"]
+    resourceNames: ["cluster-autoscaler"]
+    verbs: ["get", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["watch", "list", "get", "update"]
+  - apiGroups: [""]
+    resources: ["namepaces"]
+    verbs: ["list"]
+  - apiGroups: [""]
+    resources:
+      - "pods"
+      - "services"
+      - "replicationcontrollers"
+      - "persistentvolumeclaims"
+      - "persistentvolumes"
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["extensions"]
+    resources: ["replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["policy"]
+    resources: ["poddisruptionbudgets"]
+    verbs: ["watch", "list"]
+  - apiGroups: ["apps"]
+    resources: ["statefulsets", "replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["storageclasses", "csinodes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["batch"]
+    resources: ["jobs", "cronjobs"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create"]
+  - apiGroups: ["coordination.k8s.io"]
+    resourceNames: ["cluster-autoscaler"]
+    resources: ["leases"]
+    verbs: ["get", "update"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csidrivers", "csistoragecapacities"]
+    verbs: ["get", "list"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["create","list","watch"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    resourceNames:
+      - "cluster-autoscaler-status"
+      - "cluster-autoscaler-priority-expander"
+    verbs: ["delete", "get", "update", "watch"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    app: cluster-autoscaler
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: cluster-autoscaler
+  template:
+    metadata:
+      labels:
+        app: cluster-autoscaler
+    spec:
+      serviceAccountName: cluster-autoscaler
+      containers:
+        - image: docker.io/jlamillan/autoscaler:oci-pr-rc6
+          name: cluster-autoscaler
+          command:
+            - ./cluster-autoscaler
+            - --v=5
+            - --logtostderr=true
+            - --cloud-provider=oci
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia
+            - --scale-down-delay-after-add=1m
+            - --scale-down-unneeded-time=1m
+            - --namespace=kube-system
+          imagePullPolicy: "Always"
+          env:
+            - name: OCI_USE_INSTANCE_PRINCIPAL
+              value: "false"
+          volumeMounts:
+            - name: oci-config-vol
+              mountPath: "/root/.oci"
+              readOnly: true
+      volumes:
+        - name: oci-config-vol
+          secret:
+            secretName: oci-config
+            items:
+              - key: config
+                path: config
+              - key: api_key.pem
+                path: api_key.pem
--- a/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
+++ b/cluster-autoscaler/cloudprovider/oci/examples/oci-ip-cluster-autoscaler-w-principals.yaml
@ -0,0 +1,163 @@
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+  name: cluster-autoscaler
+  namespace: kube-system
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["events", "endpoints"]
+    verbs: ["create", "patch"]
+  - apiGroups: [""]
+    resources: ["pods/eviction"]
+    verbs: ["create"]
+  - apiGroups: [""]
+    resources: ["pods/status"]
+    verbs: ["update"]
+  - apiGroups: [""]
+    resources: ["endpoints"]
+    resourceNames: ["cluster-autoscaler"]
+    verbs: ["get", "update"]
+  - apiGroups: [""]
+    resources: ["nodes"]
+    verbs: ["watch", "list", "get", "update"]
+  - apiGroups: [""]
+    resources: ["namespaces"]
+    verbs: ["list"]
+  - apiGroups: [""]
+    resources:
+      - "pods"
+      - "services"
+      - "replicationcontrollers"
+      - "persistentvolumeclaims"
+      - "persistentvolumes"
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["extensions"]
+    resources: ["replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["policy"]
+    resources: ["poddisruptionbudgets"]
+    verbs: ["watch", "list"]
+  - apiGroups: ["apps"]
+    resources: ["statefulsets", "replicasets", "daemonsets"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["storageclasses", "csinodes"]
+    verbs: ["get", "list", "watch"]
+  - apiGroups: ["batch"]
+    resources: ["jobs", "cronjobs"]
+    verbs: ["watch", "list", "get"]
+  - apiGroups: ["coordination.k8s.io"]
+    resources: ["leases"]
+    verbs: ["create"]
+  - apiGroups: ["coordination.k8s.io"]
+    resourceNames: ["cluster-autoscaler"]
+    resources: ["leases"]
+    verbs: ["get", "update"]
+  - apiGroups: ["storage.k8s.io"]
+    resources: ["csidrivers", "csistoragecapacities"]
+    verbs: ["get", "list"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+rules:
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    verbs: ["create","list","watch"]
+  - apiGroups: [""]
+    resources: ["configmaps"]
+    resourceNames:
+      - "cluster-autoscaler-status"
+      - "cluster-autoscaler-priority-expander"
+    verbs: ["delete", "get", "update", "watch"]
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: cluster-autoscaler
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    k8s-addon: cluster-autoscaler.addons.k8s.io
+    k8s-app: cluster-autoscaler
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: cluster-autoscaler
+subjects:
+  - kind: ServiceAccount
+    name: cluster-autoscaler
+    namespace: kube-system
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: cluster-autoscaler
+  namespace: kube-system
+  labels:
+    app: cluster-autoscaler
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: cluster-autoscaler
+  template:
+    metadata:
+      labels:
+        app: cluster-autoscaler
+    spec:
+      serviceAccountName: cluster-autoscaler
+      containers:
+        - image: docker.io/jlamillan/autoscaler:oci-pr-rc6
+          name: cluster-autoscaler
+          command:
+            - ./cluster-autoscaler
+            - --v=5
+            - --logtostderr=true
+            - --cloud-provider=oci
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaaqdxy35acq32zjfvkybjmvlbdgj6q3m55qkwwctxhsprmz633k62q
+            - --nodes=0:10:ocid1.instancepool.oc1.phx.aaaaaaaazldzcu4mi5spz56upbtwnsynz2nk6jvmx7zi4hsta4uggxbulbua
+            - --nodes=1:10:ocid1.instancepool.oc1.phx.aaaaaaaal3jhoc32ljsfaeif4x2ssfa2a63oehjgqryiueivieee6yaqbkia
+            - --scale-down-delay-after-add=10m
+            - --scale-down-unneeded-time=10m
+            - --namespace=kube-system
+          imagePullPolicy: "Always"
+          env:
+            - name: OCI_USE_INSTANCE_PRINCIPAL
+              value: "true"
+            - name: OCI_REGION
+              value: "us-phoenix-1"
--- a/cluster-autoscaler/cloudprovider/oci/examples/placement-config.json
+++ b/cluster-autoscaler/cloudprovider/oci/examples/placement-config.json
@ -0,0 +1,6 @@
+[
+  {
+    "availabilityDomain": "hXgQ:PHX-AD-2",
+    "primarySubnetId": "ocid1.subnet.oc1.phx.aaaaaaaaouihv645dp2xaee6w4uvx6emjwuscsrxcn3miwa6vmijtpdnqdeq"
+  }
+]
--- a/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_cloud_provider.go
@ -0,0 +1,175 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"github.com/pkg/errors"
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"k8s.io/autoscaler/cluster-autoscaler/config"
+	caerrors "k8s.io/autoscaler/cluster-autoscaler/utils/errors"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+	"k8s.io/client-go/tools/clientcmd"
+	"k8s.io/klog/v2"
+	"time"
+)
+
+const (
+	ociUseInstancePrincipalEnvVar = "OCI_USE_INSTANCE_PRINCIPAL"
+	ociCompartmentEnvVar          = "OCI_COMPARTMENT_ID"
+	ociRegionEnvVar               = "OCI_REGION"
+	ociRefreshInterval            = "OCI_REFRESH_INTERVAL"
+	ociAnnotationCompartmentID    = "oci.oraclecloud.com/compartment-id"
+	// ResourceGPU is the GPU resource type
+	ResourceGPU            apiv1.ResourceName = "nvidia.com/gpu"
+	defaultRefreshInterval                    = 5 * time.Minute
+)
+
+// OciCloudProvider implements the CloudProvider interface for OCI. It contains an
+// instance pool manager to interact with OCI instance pools.
+type OciCloudProvider struct {
+	rl          *cloudprovider.ResourceLimiter
+	poolManager InstancePoolManager
+}
+
+// CloudConfig holds the cloud config for OCI provider.
+type CloudConfig struct {
+	Global struct {
+		RefreshInterval       time.Duration `gcfg:"refresh-interval"`
+		CompartmentID         string        `gcfg:"compartment-id"`
+		Region                string        `gcfg:"region"`
+		UseInstancePrinciples bool          `gcfg:"use-instance-principals"`
+	}
+}
+
+// Name returns name of the cloud provider.
+func (ocp *OciCloudProvider) Name() string {
+	return cloudprovider.OracleCloudProviderName
+}
+
+// NodeGroups returns all node groups configured for this cloud provider.
+func (ocp *OciCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
+	nodePools := ocp.poolManager.GetInstancePools()
+	result := make([]cloudprovider.NodeGroup, 0, len(nodePools))
+	for _, nodePool := range nodePools {
+		result = append(result, nodePool)
+	}
+	return result
+}
+
+// NodeGroupForNode returns the node group for the given node, nil if the node
+// should not be processed by cluster autoscaler, or non-nil error if such
+// occurred. Must be implemented.
+func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.NodeGroup, error) {
+
+	ociRef, err := nodeToOciRef(n)
+	if err != nil {
+		return nil, err
+	}
+
+	ng, err := ocp.poolManager.GetInstancePoolForInstance(ociRef)
+
+	// this instance may not be a part of an instance pool, or it may be part of a instance pool that the autoscaler does not manage
+	if errors.Cause(err) == errInstanceInstancePoolNotFound {
+		// should not be processed by cluster autoscaler
+		return nil, nil
+	}
+
+	return ng, err
+}
+
+// Pricing returns pricing model for this cloud provider or error if not available.
+// Implementation optional.
+func (ocp *OciCloudProvider) Pricing() (cloudprovider.PricingModel, caerrors.AutoscalerError) {
+	klog.Info("Pricing called")
+	return nil, cloudprovider.ErrNotImplemented
+}
+
+// GetAvailableMachineTypes getInstancePool all machine types that can be requested from the cloud provider.
+// Implementation optional.
+func (ocp *OciCloudProvider) GetAvailableMachineTypes() ([]string, error) {
+	klog.Info("GetAvailableMachineTypes called")
+	return nil, cloudprovider.ErrNotImplemented
+}
+
+// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
+// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
+// Implementation optional.
+func (ocp *OciCloudProvider) NewNodeGroup(machineType string,
+	labels map[string]string,
+	systemLabels map[string]string,
+	taints []apiv1.Taint,
+	extraResources map[string]resource.Quantity,
+) (cloudprovider.NodeGroup, error) {
+	return nil, cloudprovider.ErrNotImplemented
+}
+
+// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.).
+func (ocp *OciCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) {
+	return ocp.rl, nil
+}
+
+// GPULabel returns the label added to nodes with GPU resource.
+func (ocp *OciCloudProvider) GPULabel() string {
+	// No labels, only taint: nvidia.com/gpu:NoSchedule
+	return ""
+}
+
+// GetAvailableGPUTypes return all available GPU types cloud provider supports.
+func (ocp *OciCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
+	return map[string]struct{}{}
+}
+
+// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
+func (ocp *OciCloudProvider) Cleanup() error {
+	return ocp.poolManager.Cleanup()
+}
+
+// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
+// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
+func (ocp *OciCloudProvider) Refresh() error {
+	return ocp.poolManager.Refresh()
+}
+
+// BuildOCI constructs the OciCloudProvider object that implements the could provider interface (InstancePoolManager).
+func BuildOCI(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) *OciCloudProvider {
+
+	ipManager, err := CreateInstancePoolManager(opts.CloudConfig, do, createKubeClient(opts))
+	if err != nil {
+		klog.Fatalf("Could not create OCI cloud provider: %v", err)
+	}
+	return &OciCloudProvider{
+		poolManager: ipManager,
+		rl:          rl,
+	}
+}
+
+func getKubeConfig(opts config.AutoscalingOptions) *rest.Config {
+	klog.V(1).Infof("Using kubeconfig file: %s", opts.KubeConfigPath)
+	kubeConfig, err := clientcmd.BuildConfigFromFlags("", opts.KubeConfigPath)
+	if err != nil {
+		klog.Fatalf("Failed to build kubeConfig: %v", err)
+	}
+
+	return kubeConfig
+}
+
+func createKubeClient(opts config.AutoscalingOptions) kubernetes.Interface {
+	return kubernetes.NewForConfigOrDie(getKubeConfig(opts))
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool.go
@ -0,0 +1,234 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"fmt"
+	"github.com/pkg/errors"
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"k8s.io/autoscaler/cluster-autoscaler/config"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/klog/v2"
+	schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
+)
+
+const (
+	ociInstanceIDAnnotation      = "oci.oraclecloud.com/instance-id"
+	ociInstancePoolIDAnnotation  = "oci.oraclecloud.com/instancepool-id"
+	ociInstancePoolResourceIdent = "instancepool"
+)
+
+// InstancePoolNodeGroup implements the NodeGroup interface using OCI instance pools.
+type InstancePoolNodeGroup struct {
+	manager    InstancePoolManager
+	kubeClient kubernetes.Interface
+	id         string
+	minSize    int
+	maxSize    int
+}
+
+// MaxSize returns maximum size of the instance-pool based node group.
+func (ip *InstancePoolNodeGroup) MaxSize() int {
+	return ip.maxSize
+}
+
+// MinSize returns minimum size of the instance-pool based node group.
+func (ip *InstancePoolNodeGroup) MinSize() int {
+	return ip.minSize
+}
+
+// TargetSize returns the current target size of the instance-pool based node group. It is possible that the
+// number of nodes in Kubernetes is different at the moment but should be equal
+// to Size() once everything stabilizes (new nodes finish startup and registration or
+// removed nodes are deleted completely). Implementation required.
+func (ip *InstancePoolNodeGroup) TargetSize() (int, error) {
+	return ip.manager.GetInstancePoolSize(*ip)
+}
+
+// IncreaseSize increases the size of the instance-pool based node group. To delete a node you need
+// to explicitly name it and use DeleteNode. This function should wait until
+// instance-pool size is updated. Implementation required.
+func (ip *InstancePoolNodeGroup) IncreaseSize(delta int) error {
+	if delta <= 0 {
+		return fmt.Errorf("size increase must be positive")
+	}
+
+	size, err := ip.manager.GetInstancePoolSize(*ip)
+	if err != nil {
+		return err
+	}
+
+	if size+delta > ip.MaxSize() {
+		return fmt.Errorf("size increase too large - desired:%d max:%d", size+delta, ip.MaxSize())
+	}
+
+	return ip.manager.SetInstancePoolSize(*ip, size+delta)
+}
+
+// DeleteNodes deletes nodes from this instance-pool. Error is returned either on
+// failure or if the given node doesn't belong to this instance-pool. This function
+// should wait until instance-pool size is updated. Implementation required.
+func (ip *InstancePoolNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
+
+	// FYI, unregistered nodes come in as the provider id as node name.
+
+	klog.Infof("DeleteNodes called with %d node(s)", len(nodes))
+
+	size, err := ip.manager.GetInstancePoolSize(*ip)
+	if err != nil {
+		return err
+	}
+
+	if size <= ip.MinSize() {
+		return fmt.Errorf("min size reached, nodes will not be deleted")
+	}
+
+	refs := make([]OciRef, 0, len(nodes))
+	for _, node := range nodes {
+		belongs, err := ip.Belongs(node)
+		if err != nil {
+			return err
+		}
+		if !belongs {
+			return fmt.Errorf("%s belong to a different instance-pool than %s", node.Name, ip.Id())
+		}
+		ociRef, err := nodeToOciRef(node)
+		if err != nil {
+			return err
+		}
+
+		refs = append(refs, ociRef)
+	}
+
+	return ip.manager.DeleteInstances(*ip, refs)
+}
+
+// DecreaseTargetSize decreases the target size of the instance-pool based node group. This function
+// doesn't permit to delete any existing node and can be used only to reduce the
+// request for new nodes that have not been yet fulfilled. Delta should be negative.
+// It is assumed that cloud provider will not delete the existing nodes when there
+// is an option to just decrease the target. Implementation required.
+func (ip *InstancePoolNodeGroup) DecreaseTargetSize(delta int) error {
+	if delta >= 0 {
+		return fmt.Errorf("size decrease must be negative")
+	}
+
+	size, err := ip.manager.GetInstancePoolSize(*ip)
+	if err != nil {
+		return err
+	}
+
+	nodes, err := ip.manager.GetInstancePoolNodes(*ip)
+	if err != nil {
+		return err
+	}
+
+	if size+delta < len(nodes) {
+		return fmt.Errorf("attempt to delete existing nodes targetSize:%d delta:%d existingNodes: %d",
+			size, delta, len(nodes))
+	}
+
+	return ip.manager.SetInstancePoolSize(*ip, size+delta)
+}
+
+// Belongs returns true if the given node belongs to the InstancePoolNodeGroup.
+func (ip *InstancePoolNodeGroup) Belongs(node *apiv1.Node) (bool, error) {
+	ref, err := nodeToOciRef(node)
+	if err != nil {
+		return false, err
+	}
+
+	targetInstancePool, err := ip.manager.GetInstancePoolForInstance(ref)
+	if err != nil {
+		return false, err
+	}
+
+	if targetInstancePool == nil {
+		return false, fmt.Errorf("%s doesn't belong to a known instance-pool", node.Name)
+	}
+
+	return targetInstancePool.Id() == ip.Id(), nil
+}
+
+// Id returns an unique identifier of the instance-pool based node group.
+func (ip *InstancePoolNodeGroup) Id() string {
+	return ip.id
+}
+
+// Debug returns a string containing all information regarding this instance-pool.
+func (ip *InstancePoolNodeGroup) Debug() string {
+	return fmt.Sprintf("%s (%d:%d)", ip.Id(), ip.MinSize(), ip.MaxSize())
+}
+
+// Nodes returns a list of all nodes that belong to this instance-pool.
+// It is required that Instance objects returned by this method have Id field set.
+// Other fields are optional.
+// This list should include also instances that might have not become a kubernetes node yet.
+func (ip *InstancePoolNodeGroup) Nodes() ([]cloudprovider.Instance, error) {
+	return ip.manager.GetInstancePoolNodes(*ip)
+}
+
+// TemplateNodeInfo returns a schedulerframework.NodeInfo structure of an empty
+// (as if just started) node. This will be used in scale-up simulations to
+// predict what would a new node look like if a instance-pool was expanded. The returned
+// NodeInfo is expected to have a fully populated Node object, with all of the labels,
+// capacity and allocatable information as well as all pods that are started on
+// the node by default, using manifest (most likely only kube-proxy). Implementation optional.
+func (ip *InstancePoolNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error) {
+	node, err := ip.manager.GetInstancePoolTemplateNode(*ip)
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to build node info template")
+	}
+
+	nodeInfo := schedulerframework.NewNodeInfo(
+		cloudprovider.BuildKubeProxy(ip.id),
+		buildCSINodePod(),
+	)
+	nodeInfo.SetNode(node)
+	return nodeInfo, nil
+}
+
+// Exist checks if the instance-pool based node group really exists on the cloud provider side. Allows to tell the
+// theoretical instance-pool from the real one. Implementation required.
+func (ip *InstancePoolNodeGroup) Exist() bool {
+	return true
+}
+
+// Create creates the instance-pool based node group on the cloud provider side. Implementation optional.
+func (ip *InstancePoolNodeGroup) Create() (cloudprovider.NodeGroup, error) {
+	return nil, cloudprovider.ErrNotImplemented
+}
+
+// Delete deletes the instance-pool based node group on the cloud provider side.
+// This will be executed only for autoprovisioned instance-pools, once their size drops to 0.
+// Implementation optional.
+func (ip *InstancePoolNodeGroup) Delete() error {
+	return cloudprovider.ErrNotImplemented
+}
+
+// GetOptions returns NodeGroupAutoscalingOptions that should be used for this particular
+// InstancePoolNodeGroup. Returning a nil will result in using default options.
+func (ip *InstancePoolNodeGroup) GetOptions(defaults config.NodeGroupAutoscalingOptions) (*config.NodeGroupAutoscalingOptions, error) {
+	return nil, cloudprovider.ErrNotImplemented
+}
+
+// Autoprovisioned returns true if the instance-pool based node group is autoprovisioned. An autoprovisioned group
+// was created by CA and can be deleted when scaled to 0.
+func (ip *InstancePoolNodeGroup) Autoprovisioned() bool {
+	return false
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_cache.go
@ -0,0 +1,363 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"context"
+	"github.com/pkg/errors"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"k8s.io/klog/v2"
+	"strings"
+	"sync"
+	"time"
+)
+
+// ComputeMgmtClient wraps core.ComputeManagementClient exposing the functions we actually require.
+type ComputeMgmtClient interface {
+	GetInstancePool(context.Context, core.GetInstancePoolRequest) (core.GetInstancePoolResponse, error)
+	UpdateInstancePool(context.Context, core.UpdateInstancePoolRequest) (core.UpdateInstancePoolResponse, error)
+	GetInstancePoolInstance(context.Context, core.GetInstancePoolInstanceRequest) (core.GetInstancePoolInstanceResponse, error)
+	ListInstancePoolInstances(context.Context, core.ListInstancePoolInstancesRequest) (core.ListInstancePoolInstancesResponse, error)
+	DetachInstancePoolInstance(context.Context, core.DetachInstancePoolInstanceRequest) (core.DetachInstancePoolInstanceResponse, error)
+}
+
+// ComputeClient wraps core.ComputeClient exposing the functions we actually require.
+type ComputeClient interface {
+	ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error)
+}
+
+// VirtualNetworkClient wraps core.VirtualNetworkClient exposing the functions we actually require.
+type VirtualNetworkClient interface {
+	GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error)
+}
+
+type instancePoolCache struct {
+	mu                   sync.Mutex
+	poolCache            map[string]*core.InstancePool
+	instanceSummaryCache map[string]*[]core.InstanceSummary
+	targetSize           map[string]int
+	unownedInstances     map[OciRef]bool
+
+	computeManagementClient ComputeMgmtClient
+	computeClient           ComputeClient
+	virtualNetworkClient    VirtualNetworkClient
+}
+
+func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient) *instancePoolCache {
+	return &instancePoolCache{
+		poolCache:               map[string]*core.InstancePool{},
+		instanceSummaryCache:    map[string]*[]core.InstanceSummary{},
+		targetSize:              map[string]int{},
+		unownedInstances:        map[OciRef]bool{},
+		computeManagementClient: computeManagementClient,
+		computeClient:           computeClient,
+		virtualNetworkClient:    virtualNetworkClient,
+	}
+}
+
+func (c *instancePoolCache) InstancePools() map[string]*core.InstancePool {
+	result := map[string]*core.InstancePool{}
+	for k, v := range c.poolCache {
+		result[k] = v
+	}
+	return result
+}
+
+func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePoolNodeGroup, cfg CloudConfig) error {
+	// Since we only support static instance-pools we don't need to worry about pruning.
+
+	for id := range staticInstancePools {
+		resp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
+			InstancePoolId: common.String(id),
+		})
+		if err != nil {
+			klog.Errorf("get instance pool %s failed: %v", id, err)
+			return err
+		}
+		klog.V(5).Infof("GetInstancePool() response %v", resp.InstancePool)
+
+		c.setInstancePool(&resp.InstancePool)
+
+		// OCI instance-pools do not contain individual instance objects so they must be fetched separately.
+		listInstancesResponse, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{
+			InstancePoolId: common.String(id),
+			CompartmentId:  common.String(cfg.Global.CompartmentID),
+		})
+		if err != nil {
+			return err
+		}
+		klog.V(5).Infof("ListInstancePoolInstances() response %v", listInstancesResponse.Items)
+		c.setInstanceSummaries(*resp.InstancePool.Id, &listInstancesResponse.Items)
+	}
+	// Reset unowned instances cache.
+	c.unownedInstances = make(map[OciRef]bool)
+
+	return nil
+}
+
+// removeInstance tries to remove the instance from the specified instance pool. If the instance isn't in the array,
+// then it won't do anything removeInstance returns true if it actually removed the instance and reduced the size of
+// the instance pool.
+func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, instanceID string) bool {
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	if instanceID == "" {
+		klog.Warning("instanceID is not set - skipping removal.")
+		return false
+	}
+
+	// This instance pool must be in state RUNNING in order to detach a particular instance.
+	err := c.waitForInstancePoolState(context.Background(), instancePool.Id(), core.InstancePoolLifecycleStateRunning)
+	if err != nil {
+		return false
+	}
+
+	_, err = c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{
+		InstancePoolId: common.String(instancePool.Id()),
+		DetachInstancePoolInstanceDetails: core.DetachInstancePoolInstanceDetails{
+			InstanceId:      common.String(instanceID),
+			IsDecrementSize: common.Bool(true),
+			IsAutoTerminate: common.Bool(true),
+		},
+	})
+
+	if err == nil {
+		// Decrease pool size in cache since IsDecrementSize was true
+		c.targetSize[instancePool.Id()] -= 1
+		return true
+	}
+
+	return false
+}
+
+// findInstanceByDetails attempts to find the given instance by details by searching
+// through the configured instance-pools (ListInstancePoolInstances) for a match.
+func (c *instancePoolCache) findInstanceByDetails(ociInstance OciRef) (*OciRef, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	// Minimum amount of information we need to make a positive match
+	if ociInstance.InstanceID == "" && ociInstance.PrivateIPAddress == "" && ociInstance.PublicIPAddress == "" {
+		return nil, errors.New("instance id or an IP address is required to resolve details")
+	}
+
+	if c.unownedInstances[ociInstance] {
+		// We already know this instance is not part of a configured pool. Return early and avoid additional API calls.
+		klog.V(4).Infof("Node " + ociInstance.Name + " is known to not be a member of any of the specified instance pool(s)")
+		return nil, errInstanceInstancePoolNotFound
+	}
+
+	// Look for the instance in each of the specified pool(s)
+	for _, nextInstancePool := range c.poolCache {
+		// Skip searching instance pool if it's instance count is 0.
+		if *nextInstancePool.Size == 0 {
+			klog.V(4).Infof("skipping over instance pool %s since it is empty", *nextInstancePool.Id)
+			continue
+		}
+		// List instances in the next pool
+		listInstancePoolInstances, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{
+			CompartmentId:  common.String(ociInstance.CompartmentID),
+			InstancePoolId: nextInstancePool.Id,
+		})
+		if err != nil {
+			return nil, err
+		}
+
+		for _, poolMember := range listInstancePoolInstances.Items {
+			// Skip comparing this instance if it is not in the Running state
+			if strings.ToLower(*poolMember.State) != strings.ToLower(string(core.InstanceLifecycleStateRunning)) {
+				klog.V(4).Infof("skipping over instance %s: since it is not in the running state: %s", *poolMember.Id, *poolMember.State)
+				continue
+			}
+			listVnicAttachments, err := c.computeClient.ListVnicAttachments(context.Background(), core.ListVnicAttachmentsRequest{
+				CompartmentId: common.String(*poolMember.CompartmentId),
+				InstanceId:    poolMember.Id,
+			})
+			if err != nil {
+				klog.Errorf("list vNIC attachments for %s failed: %v", *poolMember.Id, err)
+				return nil, err
+			}
+			klog.V(5).Infof("ListVnicAttachments() response for %s: %v", *poolMember.Id, listVnicAttachments.Items)
+			for _, vnicAttachment := range listVnicAttachments.Items {
+				// Skip this attachment if the vNIC is not live
+				if core.VnicAttachmentLifecycleStateAttached != vnicAttachment.LifecycleState {
+					klog.V(4).Infof("skipping vNIC on instance %s: since it is not active", *poolMember.Id)
+					continue
+				}
+				getVnicResp, err := c.virtualNetworkClient.GetVnic(context.Background(), core.GetVnicRequest{
+					VnicId: vnicAttachment.VnicId,
+				})
+				if err != nil {
+					klog.Errorf("get vNIC for %s failed: %v", *poolMember.Id, err)
+					return nil, err
+				}
+				klog.V(5).Infof("GetVnic() response for vNIC %s: %v", *vnicAttachment.Id, getVnicResp.Vnic)
+				// Preferably we match by instanceID, but we can match by private or public IP
+				if *poolMember.Id == ociInstance.InstanceID ||
+					(getVnicResp.Vnic.PrivateIp != nil && *getVnicResp.Vnic.PrivateIp == ociInstance.PrivateIPAddress) ||
+					(getVnicResp.Vnic.PublicIp != nil && *getVnicResp.Vnic.PublicIp == ociInstance.PublicIPAddress) {
+					klog.V(4).Info(*poolMember.DisplayName, " is a member of "+*nextInstancePool.Id)
+					// Return a complete instance details.
+					if ociInstance.Name == "" {
+						ociInstance.Name = *poolMember.DisplayName
+					}
+					ociInstance.InstanceID = *poolMember.Id
+					ociInstance.PoolID = *nextInstancePool.Id
+					ociInstance.CompartmentID = *poolMember.CompartmentId
+					ociInstance.AvailabilityDomain = strings.Split(*poolMember.AvailabilityDomain, ":")[1]
+					ociInstance.Shape = *poolMember.Shape
+					ociInstance.PrivateIPAddress = *getVnicResp.Vnic.PrivateIp
+					// Public IP is optional
+					if getVnicResp.Vnic.PublicIp != nil {
+						ociInstance.PublicIPAddress = *getVnicResp.Vnic.PublicIp
+					}
+					return &ociInstance, nil
+				}
+			}
+		}
+	}
+
+	c.unownedInstances[ociInstance] = true
+	klog.V(4).Infof(ociInstance.Name + " is not a member of any of the specified instance pool(s)")
+	return nil, errInstanceInstancePoolNotFound
+}
+
+func (c *instancePoolCache) getInstancePool(id string) (*core.InstancePool, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	return c.getInstancePoolWithoutLock(id)
+}
+
+func (c *instancePoolCache) getInstancePoolWithoutLock(id string) (*core.InstancePool, error) {
+	instancePool := c.poolCache[id]
+	if instancePool == nil {
+		return nil, errors.New("instance pool was not found in the cache")
+	}
+
+	return instancePool, nil
+}
+
+func (c *instancePoolCache) setInstancePool(np *core.InstancePool) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.poolCache[*np.Id] = np
+	c.targetSize[*np.Id] = *np.Size
+}
+
+func (c *instancePoolCache) getInstanceSummaries(poolID string) (*[]core.InstanceSummary, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	return c.getInstanceSummariesWithoutLock(poolID)
+}
+
+func (c *instancePoolCache) getInstanceSummariesWithoutLock(poolID string) (*[]core.InstanceSummary, error) {
+	instanceSummaries := c.instanceSummaryCache[poolID]
+	if instanceSummaries == nil {
+		return nil, errors.New("instance summaries for instance pool id " + poolID + " were not found in cache")
+	}
+
+	return instanceSummaries, nil
+}
+
+func (c *instancePoolCache) setInstanceSummaries(instancePoolID string, is *[]core.InstanceSummary) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.instanceSummaryCache[instancePoolID] = is
+}
+
+func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
+
+	if instancePoolID == "" {
+		return errors.New("instance-pool is required")
+	}
+
+	getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
+		InstancePoolId: common.String(instancePoolID),
+	})
+	if err != nil {
+		return err
+	}
+
+	updateDetails := core.UpdateInstancePoolDetails{
+		Size:                    common.Int(size),
+		InstanceConfigurationId: getInstancePoolResp.InstanceConfigurationId,
+	}
+
+	_, err = c.computeManagementClient.UpdateInstancePool(context.Background(), core.UpdateInstancePoolRequest{
+		InstancePoolId:            common.String(instancePoolID),
+		UpdateInstancePoolDetails: updateDetails,
+	})
+	if err != nil {
+		return err
+	}
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	c.targetSize[instancePoolID] = size
+
+	return c.waitForInstancePoolState(context.Background(), instancePoolID, core.InstancePoolLifecycleStateRunning)
+}
+
+func (c *instancePoolCache) waitForInstancePoolState(ctx context.Context, instancePoolID string, desiredState core.InstancePoolLifecycleStateEnum) error {
+	timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Minute)
+	defer cancel()
+	err := wait.PollImmediateUntil(
+		// TODO we need a better implementation of this function
+		internalPollInterval,
+		func() (bool, error) {
+			getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
+				InstancePoolId: common.String(instancePoolID),
+			})
+			if err != nil {
+				klog.Errorf("getInstancePool failed. Retrying: %+v", err)
+				return false, err
+			} else if getInstancePoolResp.LifecycleState != desiredState {
+				klog.V(4).Infof("waiting for instance-pool %s to enter state: %s (current state: %s)", instancePoolID,
+					desiredState, getInstancePoolResp.LifecycleState)
+				return false, nil
+			}
+			klog.V(3).Infof("instance pool %s is in desired state: %s", instancePoolID, desiredState)
+
+			return true, nil
+		}, timeoutCtx.Done())
+	if err != nil {
+		// may be wait.ErrWaitTimeout
+		return err
+	}
+	return nil
+}
+
+func (c *instancePoolCache) getSize(id string) (int, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+
+	size, ok := c.targetSize[id]
+	if !ok {
+		return -1, errors.New("target size not found")
+	}
+
+	return size, nil
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager.go
@ -0,0 +1,513 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"fmt"
+	"gopkg.in/gcfg.v1"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/util/wait"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/klog/v2"
+	kubeletapis "k8s.io/kubelet/pkg/apis"
+
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+
+	"github.com/pkg/errors"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+)
+
+var (
+	internalPollInterval            = 1 * time.Minute
+	errInstanceInstancePoolNotFound = errors.New("instance-pool not found for instance")
+)
+
+// InstancePoolManager defines the operations required for an *instance-pool based* autoscaler.
+type InstancePoolManager interface {
+	// Refresh triggers refresh of cached resources.
+	Refresh() error
+	// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
+	Cleanup() error
+
+	// GetInstancePools returns list of registered InstancePools.
+	GetInstancePools() []*InstancePoolNodeGroup
+	// GetInstancePoolNodes returns InstancePool nodes.
+	GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error)
+	// GetInstancePoolForInstance returns InstancePool to which the given instance belongs.
+	GetInstancePoolForInstance(instance OciRef) (*InstancePoolNodeGroup, error)
+	// GetInstancePoolTemplateNode returns a template node for InstancePool.
+	GetInstancePoolTemplateNode(ip InstancePoolNodeGroup) (*apiv1.Node, error)
+	// GetInstancePoolSize gets the InstancePool size.
+	GetInstancePoolSize(ip InstancePoolNodeGroup) (int, error)
+	// SetInstancePoolSize sets the InstancePool size.
+	SetInstancePoolSize(ip InstancePoolNodeGroup, size int) error
+	// DeleteInstances deletes the given instances. All instances must be controlled by the same InstancePool.
+	DeleteInstances(ip InstancePoolNodeGroup, instances []OciRef) error
+}
+
+// InstancePoolManagerImpl is the implementation of an instance-pool based autoscaler on OCI.
+type InstancePoolManagerImpl struct {
+	cfg                 *CloudConfig
+	shapeGetter         ShapeGetter
+	staticInstancePools map[string]*InstancePoolNodeGroup
+	lastRefresh         time.Time
+	// caches the instance pool and instance summary objects received from OCI.
+	// All interactions with OCI's API should go through the poolCache.
+	instancePoolCache *instancePoolCache
+}
+
+// CreateInstancePoolManager constructs the InstancePoolManager object.
+func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, kubeClient kubernetes.Interface) (InstancePoolManager, error) {
+
+	var err error
+	var configProvider common.ConfigurationProvider
+	var cloudConfig = &CloudConfig{}
+
+	// cloudConfigPath is the optional file of variables passed in with the --cloud-config flag, which takes precedence over environment variables
+	if cloudConfigPath != "" {
+		config, fileErr := os.Open(cloudConfigPath)
+		if fileErr != nil {
+			klog.Fatalf("could not open cloud provider configuration %s: %#v", cloudConfigPath, fileErr)
+		}
+		defer config.Close()
+		if config != nil {
+			if err := gcfg.ReadInto(cloudConfig, config); err != nil {
+				klog.Errorf("could not read config: %v", err)
+				return nil, err
+			}
+		}
+	}
+	// Fall back to environment variables
+	if cloudConfig.Global.CompartmentID == "" {
+		cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar)
+	} else if !cloudConfig.Global.UseInstancePrinciples {
+		if os.Getenv(ociUseInstancePrincipalEnvVar) == "true" {
+			cloudConfig.Global.UseInstancePrinciples = true
+		}
+		if os.Getenv(ociRegionEnvVar) != "" {
+			cloudConfig.Global.Region = os.Getenv(ociRegionEnvVar)
+		}
+	}
+	if cloudConfig.Global.RefreshInterval == 0 {
+		if os.Getenv(ociRefreshInterval) != "" {
+			klog.V(4).Info("using a custom cache refresh interval %v...", os.Getenv(ociRefreshInterval))
+			cloudConfig.Global.RefreshInterval, _ = time.ParseDuration(os.Getenv(ociRefreshInterval))
+		} else {
+			cloudConfig.Global.RefreshInterval = defaultRefreshInterval
+		}
+	}
+
+	clientConfig := common.CustomClientConfiguration{
+		RetryPolicy: newRetryPolicy(),
+	}
+
+	if os.Getenv(ociUseInstancePrincipalEnvVar) == "true" {
+		klog.V(4).Info("using instance principals...")
+		region := os.Getenv(ociRegionEnvVar)
+		if region == "" {
+			klog.Fatalf("OCI_REGION is required when OCI_USE_INSTANCE_PRINCIPAL is set to true")
+		}
+		configProvider, err = auth.InstancePrincipalConfigurationProviderForRegion(common.StringToRegion(region))
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		klog.Info("using default configuration provider")
+		configProvider = common.DefaultConfigProvider()
+	}
+	providerRegion, _ := configProvider.Region()
+	klog.Infof("OCI provider region: %s ", providerRegion)
+
+	computeMgmtClient, err := core.NewComputeManagementClientWithConfigurationProvider(configProvider)
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to create compute management client")
+	}
+	computeMgmtClient.SetCustomClientConfiguration(clientConfig)
+
+	computeClient, err := core.NewComputeClientWithConfigurationProvider(configProvider)
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to create compute client")
+	}
+	computeClient.SetCustomClientConfiguration(clientConfig)
+
+	networkClient, err := core.NewVirtualNetworkClientWithConfigurationProvider(configProvider)
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to create virtual network client")
+	}
+	networkClient.SetCustomClientConfiguration(clientConfig)
+
+	cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar)
+
+	// Not passed by --cloud-config or environment variable, attempt to use the tenancy ID as the compartment ID
+	if cloudConfig.Global.CompartmentID == "" {
+		tenancyID, err := configProvider.TenancyOCID()
+		if err != nil {
+			return nil, errors.Wrap(err, "unable to retrieve tenancy ID")
+		}
+		cloudConfig.Global.CompartmentID = tenancyID
+	}
+
+	ipManager := &InstancePoolManagerImpl{
+		cfg:                 cloudConfig,
+		staticInstancePools: map[string]*InstancePoolNodeGroup{},
+		shapeGetter:         createShapeGetter(ShapeClientImpl{computeMgmtClient: computeMgmtClient, computeClient: computeClient}),
+		instancePoolCache:   newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient),
+	}
+
+	// Contains all the specs from the args that give us the pools.
+	for _, arg := range discoveryOpts.NodeGroupSpecs {
+		ip, err := instancePoolFromArg(arg)
+		if err != nil {
+			return nil, fmt.Errorf("unable to construct instance pool from argument: %v", err)
+		}
+
+		ip.manager = ipManager
+		ip.kubeClient = kubeClient
+
+		ipManager.staticInstancePools[ip.Id()] = ip
+	}
+
+	// wait until we have an initial full poolCache.
+	err = wait.PollImmediateInfinite(
+		10*time.Second,
+		func() (bool, error) {
+			err := ipManager.Refresh()
+			if err != nil {
+				klog.Errorf("unable to fill cache on startup. Retrying: %+v", err)
+				return false, nil
+			}
+
+			return true, nil
+		})
+	if err != nil {
+		return nil, err
+	}
+
+	return ipManager, nil
+}
+
+// instancePoolFromArg parses a instancepool spec represented in the form of `<minSize>:<maxSize>:<ocid>` and produces an instance pool wrapper spec object
+func instancePoolFromArg(value string) (*InstancePoolNodeGroup, error) {
+
+	if !strings.Contains(value, ociInstancePoolResourceIdent) {
+		return nil, fmt.Errorf("instance pool manager does not work with resources of type: %s", value)
+	}
+
+	tokens := strings.SplitN(value, ":", 3)
+	if len(tokens) != 3 || !strings.HasPrefix(tokens[2], "ocid") {
+		return nil, fmt.Errorf("incorrect instance configuration: %s", value)
+	}
+
+	spec := &InstancePoolNodeGroup{}
+	if size, err := strconv.Atoi(tokens[0]); err == nil {
+		spec.minSize = size
+	} else {
+		return nil, fmt.Errorf("failed to set pool min size: %s %v", tokens[0], err)
+	}
+
+	if size, err := strconv.Atoi(tokens[1]); err == nil {
+		spec.maxSize = size
+	} else {
+		return nil, fmt.Errorf("failed to set pool max size: %s %v", tokens[1], err)
+	}
+
+	spec.id = tokens[2]
+
+	klog.Infof("static instance pool wrapper spec constructed: %+v", spec)
+
+	return spec, nil
+}
+
+// Refresh triggers refresh of cached resources.
+func (m *InstancePoolManagerImpl) Refresh() error {
+	if m.lastRefresh.Add(m.cfg.Global.RefreshInterval).After(time.Now()) {
+		return nil
+	}
+
+	return m.forceRefresh()
+}
+
+func (m *InstancePoolManagerImpl) forceRefresh() error {
+	if m.cfg == nil {
+		return errors.New("instance pool manager does have a required config")
+	}
+	err := m.instancePoolCache.rebuild(m.staticInstancePools, *m.cfg)
+	if err != nil {
+		return err
+	}
+
+	m.lastRefresh = time.Now()
+	klog.Infof("Refreshed instance-pool list, next refresh after %v", m.lastRefresh.Add(m.cfg.Global.RefreshInterval))
+	return nil
+}
+
+// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
+func (m *InstancePoolManagerImpl) Cleanup() error {
+	return nil
+}
+
+// GetInstancePools returns list of registered InstancePools.
+func (m *InstancePoolManagerImpl) GetInstancePools() []*InstancePoolNodeGroup {
+	var instancePools []*InstancePoolNodeGroup
+	for _, np := range m.staticInstancePools {
+		instancePools = append(instancePools, np)
+	}
+	return instancePools
+}
+
+// GetInstancePoolNodes returns InstancePool nodes that are not in a terminal state.
+func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error) {
+
+	klog.V(4).Infof("getting instances for node pool: %q", ip.Id())
+
+	instanceSummaries, err := m.instancePoolCache.getInstanceSummaries(ip.Id())
+	if err != nil {
+		return nil, err
+	}
+
+	var providerInstances []cloudprovider.Instance
+	for _, instance := range *instanceSummaries {
+		status := &cloudprovider.InstanceStatus{}
+		switch *instance.State {
+		case string(core.InstanceLifecycleStateRunning):
+			status.State = cloudprovider.InstanceRunning
+		case string(core.InstanceLifecycleStateCreatingImage):
+			status.State = cloudprovider.InstanceCreating
+		case string(core.InstanceLifecycleStateStarting):
+			status.State = cloudprovider.InstanceCreating
+		case string(core.InstanceLifecycleStateMoving):
+			status.State = cloudprovider.InstanceCreating
+		case string(core.InstanceLifecycleStateProvisioning):
+			status.State = cloudprovider.InstanceCreating
+		case string(core.InstanceLifecycleStateTerminating):
+			status.State = cloudprovider.InstanceDeleting
+		case string(core.InstanceLifecycleStateStopping):
+			status.State = cloudprovider.InstanceDeleting
+		}
+
+		// Instance not in a terminal or unknown state, ok to add.
+		if status.State != 0 {
+			providerInstances = append(providerInstances, cloudprovider.Instance{
+				Id:     *instance.Id,
+				Status: status,
+			})
+		}
+	}
+
+	return providerInstances, nil
+}
+
+// GetInstancePoolForInstance returns InstancePool to which the given instance belongs. If
+// PoolID is not set on the specified OciRef, we will look for a match.
+func (m *InstancePoolManagerImpl) GetInstancePoolForInstance(instanceDetails OciRef) (*InstancePoolNodeGroup, error) {
+
+	if instanceDetails.PoolID != "" {
+		// It's possible that this instance belongs to an instance pool that was not specified via --nodes argument.
+		return m.staticInstancePools[instanceDetails.PoolID], nil
+	}
+
+	if instanceDetails.CompartmentID == "" {
+		// cfg.Global.CompartmentID would be set to tenancy OCID at runtime if compartment was not set.
+		instanceDetails.CompartmentID = m.cfg.Global.CompartmentID
+	}
+
+	// Details are missing from this instance - including the pool ID.
+	// Try to resolve them, though it may not be a member of an instance-pool we manage.
+	resolvedInstanceDetails, err := m.instancePoolCache.findInstanceByDetails(instanceDetails)
+	if err != nil {
+		return nil, err
+	} else if resolvedInstanceDetails == nil {
+		return nil, nil
+	}
+
+	kubeClient := m.staticInstancePools[resolvedInstanceDetails.PoolID].kubeClient
+
+	// Optionally annotate & label the node so that it does not need to be searched for in subsequent iterations.
+	_ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociInstanceIDAnnotation, resolvedInstanceDetails.InstanceID)
+	_ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociInstancePoolIDAnnotation, resolvedInstanceDetails.PoolID)
+	_ = annotateNode(kubeClient, resolvedInstanceDetails.Name, ociAnnotationCompartmentID, resolvedInstanceDetails.CompartmentID)
+	_ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelTopologyZone, resolvedInstanceDetails.AvailabilityDomain)
+	_ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelFailureDomainBetaZone, resolvedInstanceDetails.AvailabilityDomain)
+	_ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelInstanceType, resolvedInstanceDetails.Shape)
+	_ = labelNode(kubeClient, resolvedInstanceDetails.Name, apiv1.LabelInstanceTypeStable, resolvedInstanceDetails.Shape)
+	_ = setNodeProviderID(kubeClient, resolvedInstanceDetails.Name, resolvedInstanceDetails.InstanceID)
+
+	return m.staticInstancePools[resolvedInstanceDetails.PoolID], nil
+}
+
+// GetInstancePoolTemplateNode returns a template node for the InstancePool.
+func (m *InstancePoolManagerImpl) GetInstancePoolTemplateNode(ip InstancePoolNodeGroup) (*apiv1.Node, error) {
+
+	instancePool, err := m.instancePoolCache.getInstancePool(ip.Id())
+	if err != nil {
+		return nil, err
+	}
+
+	node, err := m.buildNodeFromTemplate(instancePool)
+	if err != nil {
+		return nil, err
+	}
+
+	return node, nil
+}
+
+// GetInstancePoolSize gets the instance-pool size.
+func (m *InstancePoolManagerImpl) GetInstancePoolSize(ip InstancePoolNodeGroup) (int, error) {
+	return m.instancePoolCache.getSize(ip.Id())
+}
+
+// SetInstancePoolSize sets instance-pool size.
+func (m *InstancePoolManagerImpl) SetInstancePoolSize(np InstancePoolNodeGroup, size int) error {
+
+	err := m.instancePoolCache.setSize(np.Id(), size)
+	if err != nil {
+		return err
+	}
+
+	// Interface says this function should wait until node group size is updated.
+
+	// We do not wait for the work request to finish or nodes become active on purpose. This allows
+	// the autoscaler to make decisions quicker especially since the autoscaler is aware of
+	// unregistered nodes in addition to registered nodes.
+
+	return nil
+}
+
+// DeleteInstances deletes the given instances. All instances must be controlled by the same instance-pool.
+func (m *InstancePoolManagerImpl) DeleteInstances(instancePool InstancePoolNodeGroup, instances []OciRef) error {
+	klog.Infof("DeleteInstances called on instance pool %s", instancePool.Id())
+
+	for _, instance := range instances {
+		// removeInstance auto decrements instance pool size.
+		detached := m.instancePoolCache.removeInstance(instancePool, instance.InstanceID)
+		if !detached {
+			return fmt.Errorf("could not delete instance %s from instance pool %s", instance.InstanceID, instancePool.Id())
+		}
+	}
+
+	return nil
+}
+
+func (m *InstancePoolManagerImpl) buildNodeFromTemplate(instancePool *core.InstancePool) (*apiv1.Node, error) {
+
+	node := apiv1.Node{}
+	nodeName := fmt.Sprintf("%s-%d", "inst", 555555)
+
+	ocidParts := strings.Split(*instancePool.Id, ".")
+	instanceIDPlaceholder := ocidParts[0] + "." + "instance" + "." + ocidParts[2] + "." + ocidParts[3] + ".tbd"
+
+	annotations := make(map[string]string)
+	annotations[ociAnnotationCompartmentID] = *instancePool.CompartmentId
+	annotations[ociInstancePoolIDAnnotation] = *instancePool.Id
+	annotations[ociInstanceIDAnnotation] = instanceIDPlaceholder
+
+	node.ObjectMeta = metav1.ObjectMeta{
+		Name:        nodeName,
+		Labels:      map[string]string{},
+		Annotations: annotations,
+	}
+
+	node.Status = apiv1.NodeStatus{
+		Capacity: apiv1.ResourceList{},
+	}
+	shape, err := m.shapeGetter.GetInstancePoolShape(instancePool)
+
+	if err != nil {
+		return nil, err
+	}
+
+	if shape.GPU > 0 {
+		node.Spec.Taints = append(node.Spec.Taints, apiv1.Taint{
+			Key:    "nvidia.com/gpu",
+			Value:  "",
+			Effect: "NoSchedule",
+		})
+	}
+
+	node.Status.Capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
+	node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(int64(shape.CPU), resource.DecimalSI)
+	node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(int64(shape.MemoryInBytes), resource.DecimalSI)
+	node.Status.Capacity[ResourceGPU] = *resource.NewQuantity(int64(shape.GPU), resource.DecimalSI)
+
+	node.Status.Allocatable = node.Status.Capacity
+
+	availabilityDomain, err := getInstancePoolAvailabilityDomain(instancePool)
+	if err != nil {
+		return nil, err
+	}
+
+	node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabelsForInstancePool(instancePool, nodeName, shape.Name, availabilityDomain))
+
+	node.Status.Conditions = cloudprovider.BuildReadyConditions()
+	return &node, nil
+}
+
+// getInstancePoolAvailabilityDomain determines the availability of the instance pool.
+// This breaks down if the customer specifies more than one placement configuration,
+// so best practices should be a node pool per AD if customers care about it during scheduling.
+// if there are more than 1AD defined, then we return the first one always.
+func getInstancePoolAvailabilityDomain(ip *core.InstancePool) (string, error) {
+	if len(ip.PlacementConfigurations) == 0 {
+		// At least one placement configuration is required for an instance pool, so we should not get here.
+		return "", fmt.Errorf("instance-pool %q does not have the required placement configurations", *ip.Id)
+	}
+
+	if len(ip.PlacementConfigurations) > 1 {
+		klog.Warningf("instance-pool %q has more than 1 placement config so picking first availability domain", *ip.Id)
+	}
+
+	// Get the availability domain which is by default in the format of `Uocm:PHX-AD-1`
+	// and remove the hash prefix.
+	availabilityDomain := strings.Split(*ip.PlacementConfigurations[0].AvailabilityDomain, ":")[1]
+	return availabilityDomain, nil
+}
+
+func buildGenericLabelsForInstancePool(instancePool *core.InstancePool, nodeName, shape, availabilityDomain string) map[string]string {
+	result := make(map[string]string)
+	result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
+	result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
+
+	result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
+	result[apiv1.LabelOSStable] = cloudprovider.DefaultOS
+
+	parts := strings.Split(*instancePool.Id, ".")
+	if len(parts) == 5 {
+		// backward compatibility with older pod labels
+		result[apiv1.LabelZoneRegion] = parts[3]
+		result[apiv1.LabelZoneRegionStable] = parts[3]
+	}
+
+	result[apiv1.LabelInstanceType] = shape
+	result[apiv1.LabelInstanceTypeStable] = shape
+
+	result[apiv1.LabelZoneFailureDomain] = availabilityDomain
+	// backward compatibility with older pod labels
+	result[apiv1.LabelZoneFailureDomainStable] = availabilityDomain
+
+	result[apiv1.LabelHostname] = nodeName
+
+	return result
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_instance_pool_manager_test.go
@ -0,0 +1,511 @@
+package oci
+
+import (
+	"context"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"reflect"
+	"testing"
+
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+)
+
+type mockComputeManagementClient struct {
+	err                                error
+	getInstancePoolResponse            core.GetInstancePoolResponse
+	getInstancePoolInstanceResponse    core.GetInstancePoolInstanceResponse
+	listInstancePoolInstancesResponse  core.ListInstancePoolInstancesResponse
+	updateInstancePoolResponse         core.UpdateInstancePoolResponse
+	detachInstancePoolInstanceResponse core.DetachInstancePoolInstanceResponse
+}
+
+type mockVirtualNetworkClient struct {
+	err             error
+	getVnicResponse core.GetVnicResponse
+}
+
+type mockComputeClient struct {
+	err                         error
+	listVnicAttachmentsResponse core.ListVnicAttachmentsResponse
+}
+
+func (m *mockComputeClient) ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error) {
+	return m.listVnicAttachmentsResponse, m.err
+}
+
+func (m *mockVirtualNetworkClient) GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error) {
+	return m.getVnicResponse, m.err
+}
+
+func (m *mockComputeManagementClient) ListInstancePoolInstances(_ context.Context, _ core.ListInstancePoolInstancesRequest) (core.ListInstancePoolInstancesResponse, error) {
+	return m.listInstancePoolInstancesResponse, m.err
+}
+
+func (m *mockComputeManagementClient) GetInstancePool(context.Context, core.GetInstancePoolRequest) (core.GetInstancePoolResponse, error) {
+	return m.getInstancePoolResponse, m.err
+}
+
+func (m *mockComputeManagementClient) UpdateInstancePool(context.Context, core.UpdateInstancePoolRequest) (core.UpdateInstancePoolResponse, error) {
+	return m.updateInstancePoolResponse, m.err
+}
+
+func (m *mockComputeManagementClient) GetInstancePoolInstance(context.Context, core.GetInstancePoolInstanceRequest) (core.GetInstancePoolInstanceResponse, error) {
+	return m.getInstancePoolInstanceResponse, m.err
+}
+
+func (m *mockComputeManagementClient) DetachInstancePoolInstance(context.Context, core.DetachInstancePoolInstanceRequest) (core.DetachInstancePoolInstanceResponse, error) {
+	return m.detachInstancePoolInstanceResponse, m.err
+}
+
+var computeClient = &mockComputeClient{
+	err: nil,
+	listVnicAttachmentsResponse: core.ListVnicAttachmentsResponse{
+		RawResponse: nil,
+		Items: []core.VnicAttachment{{
+			Id:             common.String("ocid1.vnic.oc1.phx.abc"),
+			LifecycleState: core.VnicAttachmentLifecycleStateAttached,
+		}},
+	},
+}
+
+var computeManagementClient = &mockComputeManagementClient{
+	err: nil,
+	getInstancePoolResponse: core.GetInstancePoolResponse{
+		InstancePool: core.InstancePool{
+			Id:                      common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+			CompartmentId:           common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+			InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"),
+			LifecycleState:          core.InstancePoolLifecycleStateRunning,
+			Size:                    common.Int(2),
+		},
+	},
+	listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{
+		RawResponse: nil,
+		Items: []core.InstanceSummary{{
+			Id:                 common.String("ocid1.instance.oc1.phx.aaa1"),
+			AvailabilityDomain: common.String("Uocm:PHX-AD-2"),
+			CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+			DisplayName:        common.String("inst-1ncvn-ociinstancepool"),
+			Shape:              common.String("VM.Standard2.8"),
+			State:              common.String(string(core.InstanceLifecycleStateRunning)),
+		}, {
+			Id:                 common.String("ocid1.instance.oc1.phx.aaacachemiss"),
+			AvailabilityDomain: common.String("Uocm:PHX-AD-2"),
+			CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+			DisplayName:        common.String("inst-2ncvn-ociinstancepool"),
+			Shape:              common.String("VM.Standard2.8"),
+			State:              common.String(string(core.InstanceLifecycleStateRunning)),
+		}},
+	},
+}
+
+var virtualNetworkClient = &mockVirtualNetworkClient{
+	err: nil,
+	getVnicResponse: core.GetVnicResponse{
+		RawResponse: nil,
+		Vnic: core.Vnic{
+			Id:        common.String("ocid1.vnic.oc1.phx.abyhqljsxigued23s7ywgcqlbpqfiysgnhxj672awzjluhoopzf7l7wvm6rq"),
+			PrivateIp: common.String("10.0.20.59"),
+			PublicIp:  common.String("129.146.58.250"),
+		},
+	},
+}
+
+func TestInstancePoolFromArgs(t *testing.T) {
+
+	value := `1:5:ocid1.instancepool.oc1.phx.aaaaaaaah`
+	instanceNodePool, err := instancePoolFromArg(value)
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	if instanceNodePool.minSize != 1 {
+		t.Errorf("got minSize %d ; wanted minSize 1", instanceNodePool.minSize)
+	}
+
+	if instanceNodePool.maxSize != 5 {
+		t.Errorf("got maxSize %d ; wanted maxSize 1", instanceNodePool.maxSize)
+	}
+
+	if instanceNodePool.id != "ocid1.instancepool.oc1.phx.aaaaaaaah" {
+		t.Errorf("got ocid %q ; wanted id \"ocid1.instancepool.oc1.phx.aaaaaaaah\"", instanceNodePool.id)
+	}
+
+	value = `1:5:ocid1.nodepool.oc1.phx.aaaaaaaah`
+	_, err = instancePoolFromArg(value)
+	if err == nil {
+		t.Fatal("expected error processing an oke based node-pool")
+	}
+
+	value = `1:5:incorrect:ocid1.instancepool.oc1.phx.aaaaaaaah`
+	_, err = instancePoolFromArg(value)
+	if err == nil {
+		t.Fatal("expected error of an invalid instance pool")
+	}
+}
+
+func TestGetSetInstancePoolSize(t *testing.T) {
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache.targetSize["ocid1.instancepool.oc1.phx.aaaaaaaai"] = 5
+
+	manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache}
+	size, err := manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if size != 5 {
+		t.Errorf("got size %d ; wanted size 5", size)
+	}
+
+	err = manager.SetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"}, 6)
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	size, err = manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaai"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if size != 6 {
+		t.Errorf("got size %d ; wanted size 6", size)
+	}
+
+}
+
+func TestGetInstancePoolForInstance(t *testing.T) {
+
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
+		Id:   common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+		Size: common.Int(1),
+	}
+
+	var cloudConfig = CloudConfig{}
+	cloudConfig.Global.CompartmentID = "compartment.oc1..aaaaaaaa7ey4sg3a6b5wnv5hlkjlkjadslkfjalskfjalsadfadsf"
+
+	manager := &InstancePoolManagerImpl{
+		cfg: &cloudConfig,
+		staticInstancePools: map[string]*InstancePoolNodeGroup{
+			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
+			"ocid1.instancepool.oc1.phx.aaaaaaaa2": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa2"},
+		},
+		instancePoolCache: nodePoolCache,
+	}
+
+	// first verify instance pool can be found when only the instance id is specified.
+	np, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" {
+		t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id())
+	}
+
+	// next, verify a valid instance can be found if it is currently missing from the cache.
+	np, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaacachemiss"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" {
+		t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1s\"", np.Id())
+	}
+
+	// next, verify an invalid instance cant be found if it is missing from the cache and pool.
+	_, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaadne"})
+	if err != errInstanceInstancePoolNotFound {
+		t.Fatalf("epected error looking for an invalid instance")
+	}
+
+	// verify an invalid instance pool produces an error.
+	ip, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaadne", PoolID: "ocid1.instancepool.oc1.phx.aaaaaaaadne"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if ip != nil {
+		t.Fatalf("expected nil looking for an instance with invalid instance & pool ids")
+	}
+
+	// next verify instance pool can be found when the instance pool id is specified directly.
+	_, err = manager.GetInstancePoolForInstance(OciRef{PoolID: "ocid1.instancepool.oc1.phx.aaaaaaaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	// next verify instance pool can be found when only the private IP is specified.
+	np, err = manager.GetInstancePoolForInstance(OciRef{
+		PrivateIPAddress: "10.0.20.59"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" {
+		t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id())
+	}
+
+	// now verify node pool can be found via lookup up by instance id in poolCache
+	np, err = manager.GetInstancePoolForInstance(OciRef{InstanceID: "ocid1.instance.oc1.phx.aaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	if np.Id() != "ocid1.instancepool.oc1.phx.aaaaaaaa1" {
+		t.Fatalf("got unexpected ocid %q ; wanted \"ocid1.instancepool.oc1.phx.aaaaaaaa1\"", np.Id())
+	}
+
+}
+
+func TestGetInstancePoolNodes(t *testing.T) {
+
+	nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
+		Id:             common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+		CompartmentId:  common.String("ocid1.compartment.oc1..aaaaaaaa1"),
+		LifecycleState: core.InstancePoolLifecycleStateRunning,
+	}
+	nodePoolCache.instanceSummaryCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &[]core.InstanceSummary{{
+		Id:                 common.String("ocid1.instance.oc1.phx.aaa1"),
+		AvailabilityDomain: common.String("PHX-AD-2"),
+		State:              common.String(string(core.InstanceLifecycleStateRunning)),
+	}, {
+		Id:                 common.String("ocid1.instance.oc1.phx.aaa2"),
+		AvailabilityDomain: common.String("PHX-AD-1"),
+		State:              common.String(string(core.InstanceLifecycleStateTerminating)),
+	},
+	}
+
+	expected := []cloudprovider.Instance{
+		{
+			Id: "ocid1.instance.oc1.phx.aaa1",
+			Status: &cloudprovider.InstanceStatus{
+				State: cloudprovider.InstanceRunning,
+			},
+		},
+		{
+			Id: "ocid1.instance.oc1.phx.aaa2",
+			Status: &cloudprovider.InstanceStatus{
+				State: cloudprovider.InstanceDeleting,
+			},
+		},
+	}
+
+	manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache, cfg: &CloudConfig{}}
+	instances, err := manager.GetInstancePoolNodes(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"})
+	if err != nil {
+		t.Fatalf("received unexpected error; %+v", err)
+	}
+
+	if !reflect.DeepEqual(instances, expected) {
+		t.Errorf("got %+v\nwanted %+v", instances, expected)
+	}
+
+	err = manager.forceRefresh()
+	if err != nil {
+		t.Fatalf("received unexpected error refreshing cache; %+v", err)
+	}
+}
+
+func TestGetInstancePoolAvailabilityDomain(t *testing.T) {
+	testCases := map[string]struct {
+		np          *core.InstancePool
+		result      string
+		expectedErr bool
+	}{
+		"single ad": {
+			np: &core.InstancePool{
+				Id:             common.String("id"),
+				LifecycleState: "",
+				PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{
+					AvailabilityDomain: common.String("hash:US-ASHBURN-1"),
+					PrimarySubnetId:    common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"),
+				}},
+				Size: common.Int(2),
+			},
+			result: "US-ASHBURN-1",
+		},
+		"multi-ad": {
+			np: &core.InstancePool{
+				Id:             common.String("id"),
+				LifecycleState: "",
+				PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{
+					AvailabilityDomain: common.String("hash:US-ASHBURN-1"),
+					PrimarySubnetId:    common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"),
+				}, {
+					AvailabilityDomain: common.String("hash:US-ASHBURN-2"),
+					PrimarySubnetId:    common.String("ocid1.subnet.oc1.phx.aaaaaaaa2"),
+				}},
+				Size: common.Int(2),
+			},
+			result: "US-ASHBURN-1",
+		},
+	}
+
+	for name, tc := range testCases {
+		t.Run(name, func(t *testing.T) {
+			ad, err := getInstancePoolAvailabilityDomain(tc.np)
+			if tc.expectedErr {
+				if err == nil {
+					t.Fatalf("expected err but not nil")
+				}
+				return
+			}
+
+			if ad != tc.result {
+				t.Errorf("got %q ; wanted %q", ad, tc.result)
+			}
+		})
+	}
+}
+
+func TestGetInstancePoolsAndInstances(t *testing.T) {
+
+	var computeManagementClient = &mockComputeManagementClient{
+		getInstancePoolResponse: core.GetInstancePoolResponse{
+			InstancePool: core.InstancePool{
+				Id:                      common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+				CompartmentId:           common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"),
+				PlacementConfigurations: nil,
+				Size:                    common.Int(2),
+			},
+		},
+		listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{
+			Items: []core.InstanceSummary{{
+				Id:                 common.String("ocid1.instance.oc1.phx.aaa1"),
+				AvailabilityDomain: common.String("Uocm:PHX-AD-2"),
+				CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				DisplayName:        common.String("inst-1ncvn-ociinstancepool"),
+				Shape:              common.String("VM.Standard2.8"),
+				State:              common.String(string(core.InstanceLifecycleStateRunning)),
+			}, {
+				Id:                 common.String("ocid1.instance.oc1.phx.aaaterminal"),
+				AvailabilityDomain: common.String("Uocm:PHX-AD-2"),
+				CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				DisplayName:        common.String("inst-2ncvn-ociinstancepool"),
+				Shape:              common.String("VM.Standard2.8"),
+				State:              common.String(string(core.InstanceLifecycleStateTerminated)),
+			}},
+		},
+	}
+
+	cloudConfig := &CloudConfig{}
+	cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+	manager := &InstancePoolManagerImpl{
+		cfg: cloudConfig,
+		staticInstancePools: map[string]*InstancePoolNodeGroup{
+			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
+		},
+		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
+	}
+
+	// Populate cache(s) (twice to increase code coverage).
+	_ = manager.Refresh()
+	err := manager.Refresh()
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	err = manager.forceRefresh()
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	instancePoolNodeGroups := manager.GetInstancePools()
+	if got := len(instancePoolNodeGroups); got != 1 {
+		t.Fatalf("expected 1 (static) instance pool, got %d", got)
+	}
+	instances, err := manager.GetInstancePoolNodes(*instancePoolNodeGroups[0])
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	// Should not pick up terminated instance.
+	if got := len(instances); got != 1 {
+		t.Fatalf("expected 1 instance, got %d", got)
+	}
+
+	instancePoolNodeGroup, err := manager.GetInstancePoolForInstance(OciRef{InstanceID: instances[0].Id})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if !reflect.DeepEqual(instancePoolNodeGroup, instancePoolNodeGroups[0]) {
+		t.Errorf("got %+v\nwanted %+v", instancePoolNodeGroup, instancePoolNodeGroups[0])
+	}
+}
+
+func TestDeleteInstances(t *testing.T) {
+
+	var computeManagementClient = &mockComputeManagementClient{
+		getInstancePoolResponse: core.GetInstancePoolResponse{
+			InstancePool: core.InstancePool{
+				Id:                      common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+				CompartmentId:           common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"),
+				LifecycleState:          core.InstancePoolLifecycleStateRunning,
+				Size:                    common.Int(2),
+			},
+		},
+		listInstancePoolInstancesResponse: core.ListInstancePoolInstancesResponse{
+			Items: []core.InstanceSummary{{
+				Id:                 common.String("ocid1.instance.oc1.phx.aaa1"),
+				AvailabilityDomain: common.String("Uocm:PHX-AD-1"),
+				CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				DisplayName:        common.String("inst-1ncvn-ociinstancepool"),
+				Shape:              common.String("VM.Standard2.16"),
+				State:              common.String(string(core.InstanceLifecycleStateRunning)),
+			}, {
+				Id:                 common.String("ocid1.instance.oc1.phx.aaa2"),
+				AvailabilityDomain: common.String("Uocm:PHX-AD-1"),
+				CompartmentId:      common.String("ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
+				DisplayName:        common.String("inst-2ncvn-ociinstancepool"),
+				Shape:              common.String("VM.Standard2.16"),
+				State:              common.String(string(core.InstanceLifecycleStateRunning)),
+			}},
+		},
+	}
+
+	cloudConfig := &CloudConfig{}
+	cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+	manager := &InstancePoolManagerImpl{
+		cfg: cloudConfig,
+		staticInstancePools: map[string]*InstancePoolNodeGroup{
+			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
+		},
+		instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
+	}
+	// Populate cache(s).
+	manager.Refresh()
+
+	instances, err := manager.GetInstancePoolNodes(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	// Should not pick up terminated instance.
+	if got := len(instances); got != 2 {
+		t.Fatalf("expected 2 instance, got %d", got)
+	}
+	// Check size before and after delete
+	size, err := manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if size != 2 {
+		t.Errorf("got size %d ; wanted size 2 before delete", size)
+	}
+
+	instanceToDelete := OciRef{
+		AvailabilityDomain: "PHX-AD-1",
+		Name:               "inst-2ncvn-ociinstancepool",
+		InstanceID:         "ocid1.instance.oc1.phx.aaa2",
+		PoolID:             "ocid1.instancepool.oc1.phx.aaaaaaaa1",
+	}
+	err = manager.DeleteInstances(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"}, []OciRef{instanceToDelete})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	size, err = manager.GetInstancePoolSize(InstancePoolNodeGroup{id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"})
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+	if size != 1 {
+		t.Errorf("got size %d ; wanted size 1 *after* delete", size)
+	}
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_ref.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_ref.go
@ -0,0 +1,66 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	apiv1 "k8s.io/api/core/v1"
+)
+
+// OciRef contains s reference to some entity in OCI world.
+type OciRef struct {
+	AvailabilityDomain string
+	Name               string
+	CompartmentID      string
+	InstanceID         string
+	PoolID             string
+	PrivateIPAddress   string
+	PublicIPAddress    string
+	Shape              string
+}
+
+func nodeToOciRef(n *apiv1.Node) (OciRef, error) {
+	return OciRef{
+		Name:               n.ObjectMeta.Name,
+		AvailabilityDomain: n.Labels[apiv1.LabelZoneFailureDomain],
+		CompartmentID:      n.Annotations[ociAnnotationCompartmentID],
+		InstanceID:         n.Annotations[ociInstanceIDAnnotation],
+		PoolID:             n.Annotations[ociInstancePoolIDAnnotation],
+		PrivateIPAddress:   getNodeInternalAddress(n),
+		PublicIPAddress:    getNodeExternalAddress(n),
+		Shape:              n.Labels[apiv1.LabelInstanceType],
+	}, nil
+}
+
+// getNodeInternalAddress returns the first private address of the node and an empty string if none are found.
+func getNodeInternalAddress(node *apiv1.Node) string {
+	for _, address := range node.Status.Addresses {
+		if address.Type == apiv1.NodeInternalIP {
+			return address.Address
+		}
+	}
+	return ""
+}
+
+// getNodeExternalAddress returns the first public address of the node and an empty string if none are found.
+func getNodeExternalAddress(node *apiv1.Node) string {
+	for _, address := range node.Status.Addresses {
+		if address.Type == apiv1.NodeExternalIP {
+			return address.Address
+		}
+	}
+	return ""
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_shape.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_shape.go
@ -0,0 +1,136 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"context"
+	"fmt"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+	"k8s.io/klog/v2"
+)
+
+// ShapeGetter returns the oci shape attributes for the pool.
+type ShapeGetter interface {
+	GetInstancePoolShape(pool *core.InstancePool) (*Shape, error)
+}
+
+// ShapeClient is an interface around the GetInstanceConfiguration and ListShapes calls.
+type ShapeClient interface {
+	GetInstanceConfiguration(context.Context, core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error)
+	ListShapes(context.Context, core.ListShapesRequest) (core.ListShapesResponse, error)
+}
+
+// ShapeClientImpl is the implementation for fetching shape information.
+type ShapeClientImpl struct {
+	// Can fetch instance configs (flexible shapes)
+	computeMgmtClient core.ComputeManagementClient
+	// Can fetch shapes directly
+	computeClient core.ComputeClient
+}
+
+// GetInstanceConfiguration gets the instance configuration.
+func (cc ShapeClientImpl) GetInstanceConfiguration(ctx context.Context, req core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) {
+	return cc.computeMgmtClient.GetInstanceConfiguration(ctx, req)
+}
+
+// ListShapes lists the shapes.
+func (cc ShapeClientImpl) ListShapes(ctx context.Context, req core.ListShapesRequest) (core.ListShapesResponse, error) {
+	return cc.computeClient.ListShapes(ctx, req)
+}
+
+// Shape includes the resource attributes of a given shape which should be used
+// for constructing node templates.
+type Shape struct {
+	Name          string
+	CPU           float32
+	GPU           int
+	MemoryInBytes float32
+}
+
+// createShapeGetter creates a new oci shape getter.
+func createShapeGetter(shapeClient ShapeClient) ShapeGetter {
+	return &shapeGetterImpl{
+		shapeClient: shapeClient,
+		cache:       map[string]*Shape{},
+	}
+}
+
+type shapeGetterImpl struct {
+	shapeClient ShapeClient
+	cache       map[string]*Shape
+}
+
+func (osf *shapeGetterImpl) GetInstancePoolShape(ip *core.InstancePool) (*Shape, error) {
+
+	shape := &Shape{}
+
+	klog.V(5).Info("fetching shape configuration details for instance-pool " + *ip.Id)
+
+	instanceConfig, err := osf.shapeClient.GetInstanceConfiguration(context.Background(), core.GetInstanceConfigurationRequest{
+		InstanceConfigurationId: ip.InstanceConfigurationId,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	if instanceConfig.InstanceDetails == nil {
+		return nil, fmt.Errorf("instance configuration details for instance %s has not been set", *ip.Id)
+	}
+
+	if instanceDetails, ok := instanceConfig.InstanceDetails.(core.ComputeInstanceDetails); ok {
+		// flexible shape use details or look up the static shape details below.
+		if instanceDetails.LaunchDetails != nil && instanceDetails.LaunchDetails.ShapeConfig != nil {
+			if instanceDetails.LaunchDetails.Shape != nil {
+				shape.Name = *instanceDetails.LaunchDetails.Shape
+			}
+			if instanceDetails.LaunchDetails.ShapeConfig.Ocpus != nil {
+				shape.CPU = *instanceDetails.LaunchDetails.ShapeConfig.Ocpus
+				// Minimum amount of memory unless explicitly set higher
+				shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.Ocpus * 1024 * 1024 * 1024
+			}
+			if instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs != nil {
+				shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs * 1024 * 1024 * 1024
+			}
+		} else {
+			allShapes, _ := osf.shapeClient.ListShapes(context.Background(), core.ListShapesRequest{
+				CompartmentId: instanceConfig.CompartmentId,
+			})
+			for _, nextShape := range allShapes.Items {
+				if *nextShape.Shape == *instanceDetails.LaunchDetails.Shape {
+					shape.Name = *nextShape.Shape
+					if nextShape.Ocpus != nil {
+						shape.CPU = *nextShape.Ocpus
+					}
+					if nextShape.MemoryInGBs != nil {
+						shape.MemoryInBytes = *nextShape.MemoryInGBs * 1024 * 1024 * 1024
+					}
+					if nextShape.Gpus != nil {
+						shape.GPU = *nextShape.Gpus
+					}
+				}
+			}
+		}
+	} else {
+		return nil, fmt.Errorf("(compute) instance configuration for instance-pool %s not found", *ip.Id)
+	}
+
+	// Didn't find a match
+	if shape.Name == "" {
+		return nil, fmt.Errorf("shape information for instance-pool %s not found", *ip.Id)
+	}
+	return shape, nil
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_shape_test.go
@ -0,0 +1,237 @@
+package oci
+
+import (
+	"context"
+	apiv1 "k8s.io/api/core/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
+	kubeletapis "k8s.io/kubelet/pkg/apis"
+	"reflect"
+	"strings"
+	"testing"
+
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
+)
+
+type mockShapeClient struct {
+	err                   error
+	listShapeResp         core.ListShapesResponse
+	getInstanceConfigResp core.GetInstanceConfigurationResponse
+}
+
+func (m *mockShapeClient) ListShapes(_ context.Context, _ core.ListShapesRequest) (core.ListShapesResponse, error) {
+	return m.listShapeResp, m.err
+}
+
+func (m *mockShapeClient) GetInstanceConfiguration(context.Context, core.GetInstanceConfigurationRequest) (core.GetInstanceConfigurationResponse, error) {
+	return m.getInstanceConfigResp, m.err
+}
+
+var launchDetails = core.InstanceConfigurationLaunchInstanceDetails{
+	CompartmentId:     nil,
+	DisplayName:       nil,
+	CreateVnicDetails: nil,
+	Shape:             common.String("VM.Standard.E3.Flex"),
+	ShapeConfig: &core.InstanceConfigurationLaunchInstanceShapeConfigDetails{
+		Ocpus:       common.Float32(8),
+		MemoryInGBs: common.Float32(128),
+	},
+	SourceDetails: nil,
+}
+var instanceDetails = core.ComputeInstanceDetails{
+	LaunchDetails: &launchDetails,
+}
+
+var shapeClient = &mockShapeClient{
+	err: nil,
+	listShapeResp: core.ListShapesResponse{
+		Items: []core.Shape{
+			{
+				Shape:       common.String("VM.Standard2.8"),
+				Ocpus:       common.Float32(8),
+				MemoryInGBs: common.Float32(120),
+			},
+		},
+	},
+	getInstanceConfigResp: core.GetInstanceConfigurationResponse{
+		RawResponse: nil,
+		InstanceConfiguration: core.InstanceConfiguration{
+			CompartmentId:   nil,
+			Id:              common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"),
+			TimeCreated:     nil,
+			DefinedTags:     nil,
+			DisplayName:     nil,
+			FreeformTags:    nil,
+			InstanceDetails: instanceDetails,
+			DeferredFields:  nil,
+		},
+		Etag:         nil,
+		OpcRequestId: nil,
+	},
+}
+
+func TestGetShape(t *testing.T) {
+
+	testCases := map[string]struct {
+		shape    string
+		expected *Shape
+	}{
+		"flex shape": {
+			shape: "VM.Standard.E3.Flex",
+			expected: &Shape{
+				Name:          "VM.Standard.E3.Flex",
+				CPU:           8,
+				MemoryInBytes: float32(128) * 1024 * 1024 * 1024,
+				GPU:           0,
+			},
+		},
+	}
+
+	for name, tc := range testCases {
+		shapeGetter := createShapeGetter(shapeClient)
+
+		t.Run(name, func(t *testing.T) {
+			shape, err := shapeGetter.GetInstancePoolShape(&core.InstancePool{Id: &tc.shape, InstanceConfigurationId: common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1")})
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			if !reflect.DeepEqual(shape, tc.expected) {
+				t.Errorf("wanted %+v ; got %+v", tc.expected, shape)
+			}
+
+			if !strings.Contains(tc.shape, "Flex") {
+				// we can't poolCache flex shapes so only check poolCache on non flex shapes
+				cacheShape, ok := shapeGetter.(*shapeGetterImpl).cache[tc.shape]
+				if !ok {
+					t.Error("shape not found in poolCache")
+				}
+
+				if !reflect.DeepEqual(cacheShape, tc.expected) {
+					t.Errorf("wanted %+v ; got %+v", tc.expected, shape)
+				}
+			}
+
+		})
+	}
+}
+
+func TestGetInstancePoolTemplateNode(t *testing.T) {
+	instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
+	instancePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
+		Id:             common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
+		CompartmentId:  common.String("ocid1.compartment.oc1..aaaaaaaa1"),
+		LifecycleState: core.InstancePoolLifecycleStateRunning,
+		PlacementConfigurations: []core.InstancePoolPlacementConfiguration{{
+			AvailabilityDomain: common.String("hash:US-ASHBURN-1"),
+			PrimarySubnetId:    common.String("ocid1.subnet.oc1.phx.aaaaaaaa1"),
+		}},
+	}
+	instancePoolCache.instanceSummaryCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &[]core.InstanceSummary{{
+		Id:                 common.String("ocid1.instance.oc1.phx.aaa1"),
+		AvailabilityDomain: common.String("PHX-AD-2"),
+		State:              common.String(string(core.InstanceLifecycleStateRunning)),
+	},
+	}
+
+	cloudConfig := &CloudConfig{}
+	cloudConfig.Global.CompartmentID = "ocid1.compartment.oc1..aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+	var manager = &InstancePoolManagerImpl{
+		cfg:         cloudConfig,
+		shapeGetter: createShapeGetter(shapeClient),
+		staticInstancePools: map[string]*InstancePoolNodeGroup{
+			"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
+		},
+		instancePoolCache: instancePoolCache,
+	}
+
+	instancePoolNodeGroups := manager.GetInstancePools()
+
+	if got := len(instancePoolNodeGroups); got != 1 {
+		t.Fatalf("expected 1 (static) instance pool, got %d", got)
+	}
+	nodeTemplate, err := manager.GetInstancePoolTemplateNode(*instancePoolNodeGroups[0])
+	if err != nil {
+		t.Fatalf("received unexpected error refreshing cache; %+v", err)
+	}
+	labels := nodeTemplate.GetLabels()
+	if labels == nil {
+		t.Fatalf("expected labels on node object")
+	}
+	// Double check the shape label.
+	if got := labels[apiv1.LabelInstanceTypeStable]; got != "VM.Standard.E3.Flex" {
+		t.Fatalf("expected shape label %s to be set to VM.Standard.E3.Flex: %v", apiv1.LabelInstanceTypeStable, nodeTemplate.Labels)
+	}
+
+	// Also check the AD label for good measure.
+	if got := labels[apiv1.LabelTopologyZone]; got != "US-ASHBURN-1" {
+		t.Fatalf("expected AD zone label %s to be set to US-ASHBURN-1: %v", apiv1.LabelTopologyZone, nodeTemplate.Labels)
+	}
+
+}
+
+func TestBuildGenericLabels(t *testing.T) {
+
+	shapeName := "VM.Standard2.8"
+	np := &core.InstancePool{
+		Id:   common.String("ocid1.instancepool.oc1.phx.aaaaaaaah"),
+		Size: common.Int(2),
+	}
+
+	nodeName := "node1"
+	availabilityDomain := "US-ASHBURN-1"
+
+	expected := map[string]string{
+		kubeletapis.LabelArch:              cloudprovider.DefaultArch,
+		apiv1.LabelArchStable:              cloudprovider.DefaultArch,
+		kubeletapis.LabelOS:                cloudprovider.DefaultOS,
+		apiv1.LabelOSStable:                cloudprovider.DefaultOS,
+		apiv1.LabelZoneRegion:              "phx",
+		apiv1.LabelZoneRegionStable:        "phx",
+		apiv1.LabelInstanceType:            shapeName,
+		apiv1.LabelInstanceTypeStable:      shapeName,
+		apiv1.LabelZoneFailureDomain:       availabilityDomain,
+		apiv1.LabelZoneFailureDomainStable: availabilityDomain,
+		apiv1.LabelHostname:                nodeName,
+	}
+
+	launchDetails := core.InstanceConfigurationLaunchInstanceDetails{
+		Shape: common.String("VM.Standard2.8"),
+	}
+
+	instanceDetails := core.ComputeInstanceDetails{
+		LaunchDetails: &launchDetails,
+	}
+
+	// For list shapes
+	mockShapeClient := &mockShapeClient{
+		err: nil,
+		listShapeResp: core.ListShapesResponse{
+			Items: []core.Shape{
+				{Shape: common.String("VM.Standard2.4"), Ocpus: common.Float32(4), MemoryInGBs: common.Float32(60)},
+				{Shape: common.String("VM.Standard2.8"), Ocpus: common.Float32(8), MemoryInGBs: common.Float32(120)}},
+		},
+		getInstanceConfigResp: core.GetInstanceConfigurationResponse{
+			InstanceConfiguration: core.InstanceConfiguration{
+				Id:              common.String("ocid1.instanceconfiguration.oc1.phx.aaaaaaaa1"),
+				InstanceDetails: instanceDetails,
+			},
+		},
+	}
+	shapeGetter := createShapeGetter(mockShapeClient)
+
+	manager := InstancePoolManagerImpl{
+		shapeGetter: shapeGetter,
+	}
+
+	shape, err := manager.shapeGetter.GetInstancePoolShape(np)
+	if err != nil {
+		t.Fatalf("unexpected error: %+v", err)
+	}
+
+	output := buildGenericLabelsForInstancePool(np, nodeName, shape.Name, availabilityDomain)
+	if !reflect.DeepEqual(output, expected) {
+		t.Fatalf("got %+v\nwanted %+v", output, expected)
+	}
+
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_util.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_util.go
@ -0,0 +1,211 @@
+/*
+Copyright 2021 Oracle and/or its affiliates.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package oci
+
+import (
+	"context"
+	"fmt"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/klog/v2"
+	"math"
+	"math/rand"
+	"net"
+	"net/http"
+	"time"
+
+	"github.com/pkg/errors"
+	apiv1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
+	"k8s.io/kubernetes/pkg/apis/scheduling"
+)
+
+// IsRetryable returns true if the given error is retryable.
+func IsRetryable(err error) bool {
+	if err == nil {
+		return false
+	}
+
+	err = errors.Cause(err)
+
+	// Retry on network timeout errors
+	if err, ok := err.(net.Error); ok && err.Timeout() {
+		return true
+	}
+
+	// handle oci retryable errors.
+	serviceErr, ok := common.IsServiceError(err)
+	if !ok {
+		return false
+	}
+
+	switch serviceErr.GetHTTPStatusCode() {
+	case http.StatusTooManyRequests, http.StatusGatewayTimeout,
+		http.StatusInternalServerError, http.StatusBadGateway:
+		return true
+	default:
+		return false
+	}
+}
+
+func newRetryPolicy() *common.RetryPolicy {
+	return NewRetryPolicyWithMaxAttempts(uint(8))
+}
+
+// NewRetryPolicyWithMaxAttempts returns a RetryPolicy with the specified max retryAttempts
+func NewRetryPolicyWithMaxAttempts(retryAttempts uint) *common.RetryPolicy {
+	isRetryableOperation := func(r common.OCIOperationResponse) bool {
+		return IsRetryable(r.Error)
+	}
+
+	nextDuration := func(r common.OCIOperationResponse) time.Duration {
+		// you might want wait longer for next retry when your previous one failed
+		// this function will return the duration as:
+		// 1s, 2s, 4s, 8s, 16s, 32s, 64s etc...
+		return time.Duration(math.Pow(float64(2), float64(r.AttemptNumber-1))) * time.Second
+	}
+
+	policy := common.NewRetryPolicy(
+		retryAttempts, isRetryableOperation, nextDuration,
+	)
+	return &policy
+}
+
+// Missing resource requests on kube-proxy
+// Flannel missing priority
+
+func buildCSINodePod() *apiv1.Pod {
+	priority := scheduling.SystemCriticalPriority
+	return &apiv1.Pod{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      fmt.Sprintf("csi-oci-node-%d", rand.Int63()),
+			Namespace: "kube-system",
+			Labels: map[string]string{
+				"app": "csi-oci-node",
+			},
+		},
+		Spec: apiv1.PodSpec{
+			Containers: []apiv1.Container{
+				{
+					Image: "iad.ocir.io/oracle/cloud-provider-oci:latest",
+				},
+			},
+			Priority: &priority,
+		},
+		Status: apiv1.PodStatus{
+			Phase: apiv1.PodRunning,
+			Conditions: []apiv1.PodCondition{
+				{
+					Type:   apiv1.PodReady,
+					Status: apiv1.ConditionTrue,
+				},
+			},
+		},
+	}
+}
+
+func annotateNode(kubeClient kubernetes.Interface, nodeName string, key string, value string) error {
+
+	if nodeName == "" {
+		return errors.New("node name is required")
+	}
+	if kubeClient == nil {
+		return errors.New("kubeconfig is required")
+	}
+
+	node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
+	if err != nil {
+		klog.Errorf("failed to get node %s %+v", nodeName, err)
+		return err
+	}
+
+	annotations := node.GetAnnotations()
+	if annotations == nil {
+		annotations = map[string]string{}
+	}
+
+	if v := annotations[key]; v != value {
+		node.Annotations[key] = value
+		_, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
+		if err != nil {
+			klog.Errorf("failed to annotate node %s %+v", nodeName, err)
+			return err
+		}
+		klog.V(3).Infof("updated annotation %s=%s on node: %s", key, value, nodeName)
+	}
+	return nil
+}
+
+func labelNode(kubeClient kubernetes.Interface, nodeName string, key string, value string) error {
+
+	if nodeName == "" {
+		return errors.New("node name is required")
+	}
+	if kubeClient == nil {
+		return errors.New("kubeconfig is required")
+	}
+
+	node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
+	if err != nil {
+		klog.Errorf("failed to get node %s %+v", nodeName, err)
+		return err
+	}
+
+	labels := node.GetLabels()
+	if labels == nil {
+		labels = map[string]string{}
+	}
+
+	if v := labels[key]; v != value {
+		node.Labels[key] = value
+		_, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
+		if err != nil {
+			klog.Errorf("failed to label node %s %+v", nodeName, err)
+			return err
+		}
+		klog.V(3).Infof("updated label %s=%s on node: %s", key, value, nodeName)
+	}
+	return nil
+}
+
+func setNodeProviderID(kubeClient kubernetes.Interface, nodeName string, value string) error {
+
+	if nodeName == "" {
+		return errors.New("node name is required")
+	}
+	if kubeClient == nil {
+		return errors.New("kubeconfig is required")
+	}
+
+	node, err := kubeClient.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{})
+
+	if err != nil {
+		klog.Errorf("failed to get node %s %+v", nodeName, err)
+		return err
+	}
+
+	if node.Spec.ProviderID != value {
+		node.Spec.ProviderID = value
+		_, err := kubeClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
+		if err != nil {
+			klog.Errorf("failed to update node's provider ID %s %+v", nodeName, err)
+			return err
+		}
+		klog.V(3).Infof("updated provider ID on node: %s", nodeName)
+	}
+	return nil
+}
--- a/cluster-autoscaler/cloudprovider/oci/oci_util_test.go
+++ b/cluster-autoscaler/cloudprovider/oci/oci_util_test.go
@ -0,0 +1,12 @@
+package oci
+
+import (
+	"testing"
+)
+
+func TestSetProviderID(t *testing.T) {
+	err := setNodeProviderID(nil, "", "")
+	if err == nil {
+		t.Fatal("expected error")
+	}
+}
--- a/hack/boilerplate/boilerplate.py
+++ b/hack/boilerplate/boilerplate.py
@ -157,7 +157,8 @@ skipped_dirs = ['Godeps', 'third_party', '_gopath', '_output', '.git', 'cluster/
                "cluster-autoscaler/cloudprovider/digitalocean/godo",
                "cluster-autoscaler/cloudprovider/magnum/gophercloud",
                "cluster-autoscaler/cloudprovider/ionoscloud/ionos-cloud-sdk-go",
-                "cluster-autoscaler/cloudprovider/hetzner/hcloud-go"]
+                "cluster-autoscaler/cloudprovider/hetzner/hcloud-go",
+                "cluster-autoscaler/cloudprovider/oci"]

 # list all the files contain 'DO NOT EDIT', but are not generated
 skipped_ungenerated_files = ['hack/build-ui.sh', 'hack/lib/swagger.sh',
--- a/hack/verify-spelling.sh
+++ b/hack/verify-spelling.sh
@ -23,4 +23,4 @@ DIR=$(dirname $0)
 go install ${DIR}/../../../github.com/client9/misspell/cmd/misspell

 # Spell checking
-git ls-files --full-name | grep -v -e vendor | grep -v cluster-autoscaler/cloudprovider/magnum/gophercloud| grep -v cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud-sdk-go-v3 | grep -v cluster-autoscaler/cloudprovider/digitalocean/godo | grep -v cluster-autoscaler/cloudprovider/hetzner/hcloud-go | grep -v cluster-autoscaler/cloudprovider/bizflycloud/gobizfly | grep -E -v 'cluster-autoscaler/cloudprovider/brightbox/(go-cache|gobrightbox|k8ssdk|linkheader)' | xargs misspell -error -o stderr
+git ls-files --full-name | grep -v -e vendor | grep -v cluster-autoscaler/cloudprovider/magnum/gophercloud| grep -v cluster-autoscaler/cloudprovider/huaweicloud/huaweicloud-sdk-go-v3 | grep -v cluster-autoscaler/cloudprovider/digitalocean/godo | grep -v cluster-autoscaler/cloudprovider/hetzner/hcloud-go | grep -v cluster-autoscaler/cloudprovider/bizflycloud/gobizfly | grep -v cluster-autoscaler/cloudprovider/oci/oci-go-sdk | grep -E -v 'cluster-autoscaler/cloudprovider/brightbox/(go-cache|gobrightbox|k8ssdk|linkheader)' | xargs misspell -error -o stderr