Merge branch 'master' into oom-params

This commit is contained in:
navinjoy 2023-01-26 13:43:34 -08:00 committed by GitHub
commit c36f6ca3c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3098 changed files with 329068 additions and 181130 deletions

View File

@ -1,9 +1,3 @@
#### Which component this PR applies to?
<!--
Which autoscaling component hosted in this repository (cluster-autoscaler, vertical-pod-autoscaler, addon-resizer, helm charts) this PR applies to?
-->
#### What type of PR is this?
<!--

View File

@ -5,3 +5,5 @@ reviewers:
emeritus_approvers:
- bskiba # 2022-09-30
- wojtek-t # 2022-09-30
labels:
- addon-resizer

View File

@ -56,11 +56,11 @@ exposes the Scale subresource.
// or other objects that expose the Scale subresource).
type Balancer struct {
metav1.TypeMeta
// Standard object metadata. More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#metadata
// Standard object metadata. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#metadata
// +optional
metav1.ObjectMeta
// Specification of the Balancer behavior.
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#spec-and-status.
// More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#spec-and-status.
Spec BalancerSpec
// Current information about the Balancer.
// +optional

View File

@ -2,3 +2,6 @@ approvers:
- gjtempleton
reviewers:
- gjtempleton
labels:
- helm-charts

View File

@ -11,4 +11,4 @@ name: cluster-autoscaler
sources:
- https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler
type: application
version: 9.21.0
version: 9.21.1

View File

@ -70,10 +70,13 @@ Return the appropriate apiVersion for podsecuritypolicy.
{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }}
{{- if semverCompare "<1.10-0" $kubeTargetVersion -}}
{{- print "extensions/v1beta1" -}}
{{- if semverCompare ">1.21-0" $kubeTargetVersion -}}
{{- print "policy/v1" -}}
{{- else -}}
{{- print "policy/v1beta1" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Return the appropriate apiVersion for podDisruptionBudget.

View File

@ -59,6 +59,11 @@ spec:
- --nodes={{ .minSize }}:{{ .maxSize }}:{{ .name }}
{{- end }}
{{- end }}
{{- if eq .Values.cloudProvider "rancher" }}
{{- if .Values.cloudConfigPath }}
- --cloud-config={{ .Values.cloudConfigPath }}
{{- end }}
{{- end }}
{{- if eq .Values.cloudProvider "aws" }}
{{- if .Values.autoDiscovery.clusterName }}
- --node-group-auto-discovery=asg:tag={{ tpl (join "," .Values.autoDiscovery.tags) . }}

View File

@ -28,6 +28,7 @@ this document:
* [How to?](#how-to)
* [I'm running cluster with nodes in multiple zones for HA purposes. Is that supported by Cluster Autoscaler?](#im-running-cluster-with-nodes-in-multiple-zones-for-ha-purposes-is-that-supported-by-cluster-autoscaler)
* [How can I monitor Cluster Autoscaler?](#how-can-i-monitor-cluster-autoscaler)
* [How can I increase the information that the CA is logging?](#how-can-i-increase-the-information-that-the-ca-is-logging)
* [How can I see all the events from Cluster Autoscaler?](#how-can-i-see-all-events-from-cluster-autoscaler)
* [How can I scale my cluster to just 1 node?](#how-can-i-scale-my-cluster-to-just-1-node)
* [How can I scale a node group to 0?](#how-can-i-scale-a-node-group-to-0)
@ -104,7 +105,7 @@ __Or__ you have overridden this behaviour with one of the relevant flags. [See b
### Which version on Cluster Autoscaler should I use in my cluster?
See [Cluster Autoscaler Releases](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler#releases)
See [Cluster Autoscaler Releases](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler#releases).
### Is Cluster Autoscaler an Alpha, Beta or GA product?
@ -233,7 +234,7 @@ More about Pod Priority and Preemption:
Cluster Autoscaler terminates the underlying instance in a cloud-provider-dependent manner.
It does _not_ delete the [Node object](https://kubernetes.io/docs/concepts/architecture/nodes/#api-object) from Kubernetes. Cleaning up Node objects corresponding to terminated instances is the responsibility of the [cloud node controller](https://kubernetes.io/docs/concepts/architecture/cloud-controller/#node-controller), which can run as part of [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) or [cloud-controller-manager](https://v1-19.docs.kubernetes.io/docs/reference/command-line-tools-reference/cloud-controller-manager/).
It does _not_ delete the [Node object](https://kubernetes.io/docs/concepts/architecture/nodes/#api-object) from Kubernetes. Cleaning up Node objects corresponding to terminated instances is the responsibility of the [cloud node controller](https://kubernetes.io/docs/concepts/architecture/cloud-controller/#node-controller), which can run as part of [kube-controller-manager](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-controller-manager/) or [cloud-controller-manager](https://kubernetes.io/docs/concepts/architecture/cloud-controller/).
****************
@ -735,6 +736,7 @@ The following startup parameters are supported for cluster autoscaler:
| `kubeconfig` | Path to kubeconfig file with authorization and API Server location information | ""
| `cloud-config` | The path to the cloud provider configuration file. Empty string for no configuration file | ""
| `namespace` | Namespace in which cluster-autoscaler run | "kube-system"
| `scale-up-node-group-to-min-size-enabled` | Should CA scale up the node group to the configured min size if needed | false
| `scale-down-enabled` | Should CA scale down the cluster | true
| `scale-down-delay-after-add` | How long after scale up that scale down evaluation resumes | 10 minutes
| `scale-down-delay-after-delete` | How long after node deletion that scale down evaluation resumes, defaults to scan-interval | scan-interval
@ -867,7 +869,7 @@ This limitation was solved with
introduced as beta in Kubernetes 1.11 and planned for GA in 1.13.
To allow CA to take advantage of topological scheduling, use separate node groups per zone.
This way CA knows exactly which node group will create nodes in the required zone rather than relying on the cloud provider choosing a zone for a new node in a multi-zone node group.
When using separate node groups per zone, the `--balance-similar-node-groups` flag will keep nodes balanced across zones for workloads that dont require topological scheduling.
When using separate node groups per zone, the `--balance-similar-node-groups` flag will keep nodes balanced across zones for workloads that don't require topological scheduling.
### CA doesnt work, but it used to work yesterday. Why?
@ -907,6 +909,23 @@ There are three options:
* on nodes,
* on kube-system/cluster-autoscaler-status config map.
### How can I increase the information that the CA is logging?
By default, the Cluster Autoscaler will be conservative about the log messages that it emits.
This is primarily due to performance degradations in scenarios where clusters have a large
number of nodes (> 100). In these cases excess log messages will lead to the log storage
filling more quickly, and in some cases (eg clusters with >1000 nodes) the processing
performance of the Cluster Autoscaler can be impacted.
The `--v` flag controls how verbose the Cluster Autoscaler will be when running. In most
cases using a value of `--v=0` or `--v=1` will be sufficient to monitor its activity.
If you would like to have more information, especially about the scaling decisions made
by the Cluster Autoscaler, then setting a value of `--v=4` is recommended. If you are
debugging connection issues between the Cluster Autoscaler and the Kubernetes API server,
or infrastructure endpoints, then setting a value of `--v=9` will show all the individual
HTTP calls made. Be aware that using verbosity levels higher than `--v=1` will generate
an increased amount of logs, prepare your deployments and storage accordingly.
### What events are emitted by CA?
Whenever Cluster Autoscaler adds or removes nodes it will create events
@ -948,7 +967,14 @@ Events:
```
### My cluster is below minimum / above maximum number of nodes, but CA did not fix that! Why?
Cluster Autoscaler will not scale the cluster beyond these limits, but does not enforce them. If your cluster is below the minimum number of nodes configured for Cluster Autoscaler, it will be scaled up *only* in presence of unschedulable pods.
Cluster Autoscaler will not scale the cluster beyond these limits, but some other external factors could make this happen. Here are some common scenarios.
* Existing nodes were deleted from K8s and the cloud provider, which could cause the cluster fell below the minimum number of nodes.
* New nodes were added directly to the cloud provider, which could cause the cluster exceeded the maximum number of nodes.
* Cluster Autoscaler was turned on in the middle of the cluster lifecycle, and the initial number of nodes might beyond these limits.
By default, Cluster Autoscaler does not enforce the node group size. If your cluster is below the minimum number of nodes configured for CA, it will be scaled up *only* in presence of unschedulable pods. On the other hand, if your cluster is above the minimum number of nodes configured for CA, it will be scaled down *only* if it has unneeded nodes.
Starting with CA 1.26.0, a new flag `--enforce-node-group-min-size` was introduced to enforce the node group minimum size. For node groups with fewer nodes than the configuration, CA will scale them up to the minimum number of nodes. To enable this feature, please set it to `true` in the command.
### What happens in scale-up when I have no more quota in the cloud provider?

View File

@ -28,6 +28,10 @@ ifdef DOCKER_RM
else
RM_FLAG=
endif
ifndef AWS_REGION
AWS_REGION=$(shell aws configure get region)
endif
IMAGE=$(REGISTRY)/cluster-autoscaler$(PROVIDER)
export DOCKER_CLI_EXPERIMENTAL := enabled
@ -78,7 +82,7 @@ clean-arch-%:
rm -f cluster-autoscaler-$*
generate:
go generate ./cloudprovider/aws
AWS_REGION=$(AWS_REGION) go generate ./cloudprovider/aws
format:
test -z "$$(find . -path ./vendor -prune -type f -o -name '*.go' -exec gofmt -s -d {} + | tee /dev/stderr)" || \

View File

@ -1,8 +1,11 @@
approvers:
- feiskyer
- towca
- x13n
reviewers:
- feiskyer
- x13n
emeritus_approvers:
- aleksandra-malinowska # 2022-09-30
labels:
- cluster-autoscaler

View File

@ -14,27 +14,31 @@ An FAQ is available [HERE](./FAQ.md).
You should also take a look at the notes and "gotchas" for your specific cloud provider:
* [AliCloud](./cloudprovider/alicloud/README.md)
* [Azure](./cloudprovider/azure/README.md)
* [AWS](./cloudprovider/aws/README.md)
* [Azure](./cloudprovider/azure/README.md)
* [BaiduCloud](./cloudprovider/baiducloud/README.md)
* [BizflyCloud](./cloudprovider/bizflycloud/README.md)
* [Brightbox](./cloudprovider/brightbox/README.md)
* [CherryServers](./cloudprovider/cherryservers/README.md)
* [Civo](./cloudprovider/civo/README.md)
* [CloudStack](./cloudprovider/cloudstack/README.md)
* [HuaweiCloud](./cloudprovider/huaweicloud/README.md)
* [ClusterAPI](./cloudprovider/clusterapi/README.md)
* [DigitalOcean](./cloudprovider/digitalocean/README.md)
* [Exoscale](./cloudprovider/exoscale/README.md)
* [Equinix Metal](./cloudprovider/packet/README.md#notes)
* [External gRPC](./cloudprovider/externalgrpc/README.md)
* [Hetzner](./cloudprovider/hetzner/README.md)
* [Equinix Metal](./cloudprovider/packet/README.md#notes)
* [HuaweiCloud](./cloudprovider/huaweicloud/README.md)
* [IonosCloud](./cloudprovider/ionoscloud/README.md)
* [OVHcloud](./cloudprovider/ovhcloud/README.md)
* [Linode](./cloudprovider/linode/README.md)
* [OracleCloud](./cloudprovider/oci/README.md)
* [ClusterAPI](./cloudprovider/clusterapi/README.md)
* [BizflyCloud](./cloudprovider/bizflycloud/README.md)
* [Vultr](./cloudprovider/vultr/README.md)
* [TencentCloud](./cloudprovider/tencentcloud/README.md)
* [Scaleway](./cloudprovider/scaleway/README.md)
* [Rancher](./cloudprovider/rancher/README.md)
* [Kamatera](./cloudprovider/kamatera/README.md)
* [Linode](./cloudprovider/linode/README.md)
* [Magnum](./cloudprovider/magnum/README.md)
* [OracleCloud](./cloudprovider/oci/README.md)
* [OVHcloud](./cloudprovider/ovhcloud/README.md)
* [Rancher](./cloudprovider/rancher/README.md)
* [Scaleway](./cloudprovider/scaleway/README.md)
* [TencentCloud](./cloudprovider/tencentcloud/README.md)
* [Vultr](./cloudprovider/vultr/README.md)
# Releases
@ -164,23 +168,27 @@ Supported cloud providers:
* GKE https://cloud.google.com/container-engine/docs/cluster-autoscaler
* AWS https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/aws/README.md
* Azure https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/azure/README.md
* Alibaba Cloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/alicloud/README.md
* AliCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/alicloud/README.md
* BaiduCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/baiducloud/README.md
* BizflyCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/bizflycloud/README.md
* Brightbox https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/brightbox/README.md
* CherryServers https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cherryservers/README.md
* OpenStack Magnum https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/magnum/README.md
* DigitalOcean https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/digitalocean/README.md
* Civo https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/civo/README.md
* CloudStack https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/cloudstack/README.md
* ClusterAPI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md
* DigitalOcean https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/digitalocean/README.md
* Exoscale https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/exoscale/README.md
* Equinix Metal https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/packet/README.md
* External gRPC https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/externalgrpc/README.md
* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
* OCI https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
* Hetzner https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/hetzner/README.md
* Cluster API https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/clusterapi/README.md
* Vultr https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/vultr/README.md
* TencentCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/tencentcloud/README.md
* BaiduCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/baiducloud/README.md
* HuaweiCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/huaweicloud/README.md
* Rancher https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/rancher/README.md
* IonosCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ionoscloud/README.md
* Kamatera https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/kamatera/README.md
* Linode https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/linode/README.md
* Magnum https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/magnum/README.md
* OracleCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/oci/README.md
* OVHcloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/ovhcloud/README.md
* Rancher https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/rancher/README.md
* Scaleway https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/scaleway/README.md
* TencentCloud https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/tencentcloud/README.md
* Vultr https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/cloudprovider/vultr/README.md

View File

@ -0,0 +1,144 @@
# Cloudprovider policy
As of the moment this policy is written (September 2022) Cluster Autoscaler has
integrations with almost 30 different cloudproviders. At the same time there
are only a handful of core CA maintainers. The maintainers don't have the
capacity to build new integrations or maintain existing ones. In most cases they
also have no experience with particular clouds and no access to a test
environment.
Due to above reasons each integration is required to have a set of OWNERS who
are responsible for development and maintenance of the integration. This
document describes the role and responsibilities of core maintainers and
integration owners. A lot of what is described below has been unofficial
practice for multiple years now, but this policy also introduces some new
requirements for cloudprovider maintenance.
## Responsbilities
Cloudprovider owners are responsible for:
* Maintaining their integrations.
* Testing their integrations. Currently any new CA release is tested e2e on
GCE, testing on other platforms is the responsibility of cloudprovider
maintainers (note: there is an effort to make automated e2e tests possible
to run on other providers, so this may improve in the future).
* Addressing any issues raised in autoscaler github repository related to a
given provider.
* Reviewing any pull requests to their cloudprovider.
* Pull requests that only change cloudprovider code do not require any
review or approval from core maintainers.
* Pull requests that change cloudprovider and core code require approval
from both the cloudprovider owner and core maintainer.
The core maintainers will generally not interfere with cloudprovider
development, but they may take the following actions without seeking approval
from cloudprovider owners:
* Make trivial changes to cloudproviders when needed to implement changes in
CA core (ex. updating function signatures when a go interface
changes).
* Revert any pull requests that break tests, prevent CA from compiling, etc.
This includes pull requests adding new providers if they cause the tests to
start failing or break the rules defined below.
## Adding new cloud provider integration
### External provider
One way to integrate CA with a cloudprovider is to use existing
[External
gRPC](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider/externalgrpc)
provider. Integrating with gRPC interface may be easier than implementing an
in-tree cloudprovider and the gRPC provider comes with some essential caching
built in.
An external cloudprovider implementation doesn't live in this repository and is
not a part of CA image. As such it is also not a subject to this policy.
### In-tree provider
An alternative to External gRPC provider is an in-tree cloudprovider
integration. An in-tree provider allows more customization (ex. by implementing
[custom processors](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/processors)
that integrate with a specific provider), but it requires significantly more effort to
implement and maintain.
In order to add new in-tree integration you need to open a pull request implementing
the interfaces defined in cloud\_provider.go. This policy requires that any new
in-tree cloudprovider follows the following rules:
* Cloudprovider needs to have an OWNERS file that lists its maintainers.
Kubernetes policy requires that code OWNERS are members of the Kubernetes
organization.
* It is required that both reviewers and approvers sections of OWNERS file
are non-empty.
* This can create a chicken and egg problem, where adding a cloudprovider
requires being a member of Kubernetes org and becoming a member of the
organization requires a history of code contributions. For this reason it
is allowed for the OWNERS file to temporarily contain commented out github
handles. There is an expectation that at least some of the owners will
join Kubernetes organization (by following the
[process](https://github.com/kubernetes/community/blob/master/community-membership.md))
within one release cycly, so that they can approve PRs to their
cloudprovider.
* Cloudprovider shouldn't introduce new dependencies (such as clients/SDKs)
to top-level go.mod vendor, unless those dependencies are already imported
by kubernetes/kubernetes repository and the same version of the library is
used by CA and Kubernetes. This requirement is mainly driven by
the problems with version conflicts in transitive dependencies we've
experienced in the past.
* Cloudproviders are welcome to carry their dependencies inside their
directories as needed.
Note: Any functions in cloud\_provider.go marked as 'Implementation optional'
may be left unimplemented. Those functions provide additional functionality, but
are not critical. To leave a function unimplemented just have it return
cloudprovider.ErrNotImplemented.
## Cloudprovider maintenance requirements
In order to allow code changes to Cluster Autoscaler that would require
non-trivial changes in cloudproviders this policy introduces _Cloudprovider
maintenance request_ (CMR) mechanism.
* CMR will be issued via a github issue tagging all
cloudprovider owners and describing the problem being solved and the changes
requested.
* CMR will clearly state the minor version in which the changes are expected
(ex. 1.26).
* CMR will need to be discussed on sig-autoscaling meeting and approved by
sig leads before being issued. It will also be announced on sig-autoscaling
slack channel and highlited in sig-autoscaling meeting notes.
* A CMR may be issued no later then [enhancements
freeze](https://github.com/kubernetes/sig-release/blob/master/releases/release_phases.md#enhancements-freeze)
of a given Kubernetes minor version.
* If a given cloud provider was added more than one release cycle ago and there
are no valid OWNERS, CMR should request OWNERS file update.
Cloudprovider owners will be required to address CMR or request an exception via
the CMR github issue. A failure to take any action will result in cloudprovider
being considered abandoned and marking it as deprecated as described below.
### Empty maintenance request
If no CMRs are issued in a given minor release, core maintainers will issue an
_empty CMR_. The purpose of an empty CMR is to verify that cloudprovider owners
are still actively maintaining their integration. The only action required for
an empty CMR is replying on the github issue. Only one owner from each
cloudprovider needs to reply on the issue.
Empty CMR follows the same rules as any other CMR. In particular it needs to be
issued by enhancements freeze.
### Cloudprovider deprecation and deletion
If cloudprovider owners fail to take actions described above, the particular
integration will be marked as deprecated in the next CA minor release. A
deprecated cloudprovider will be completely removed after 1 year as per
[Kubernetes deprecation
policy](https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-feature-or-behavior).
A deprecated cloudprovider may become maintained again if the owners become
active again or new owners step up. In order to regain maintained status any
outstanding CMRs will need to be addressed.

View File

@ -127,6 +127,11 @@ func (ali *aliCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
return ali.manager.GetAsgForInstance(instanceId)
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (ali *aliCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
func (ali *aliCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented

View File

@ -45,7 +45,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch","list","get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["watch","list","get"]
---
@ -109,7 +109,7 @@ metadata:
type: Opaque
data:
access-key-id: [YOUR_BASE64_AK_ID]
access-key-id: [YOUR_BASE64_AK_SECRET]
access-key-secret: [YOUR_BASE64_AK_SECRET]
region-id: [YOUR_BASE64_REGION_ID]
---

View File

@ -4,3 +4,7 @@ approvers:
- drmorr0
emeritus_approvers:
- Jeffwan
reviewers:
- jaypipes
- gjtempleton
- drmorr0

View File

@ -47,6 +47,7 @@ should be updated to restrict the resources/add conditionals:
"autoscaling:DescribeAutoScalingGroups",
"autoscaling:DescribeAutoScalingInstances",
"autoscaling:DescribeLaunchConfigurations",
"autoscaling:DescribeScalingActivities",
"autoscaling:DescribeTags",
"ec2:DescribeInstanceTypes",
"ec2:DescribeLaunchTemplateVersions"
@ -164,9 +165,12 @@ Auto-Discovery Setup is the preferred method to configure Cluster Autoscaler.
To enable this, provide the `--node-group-auto-discovery` flag as an argument
whose value is a list of tag keys that should be looked for. For example,
`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/<cluster-name>,my-custom-tag=custom-value`
will find the ASGs that have the given tags. Optionally, a value can be provided
for each tag as well.
`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/<cluster-name>`
will find the ASGs that have at least all the given tags. Without the tags, the Cluster Autoscaler will be unable to add new instances
to the ASG as it has not been discovered. In the example, a value is not given for the tags and in this case any value will be ignored and
will be arbitrary - only the tag name matters. Optionally, the tag value can be set to be usable and custom tags can also be added. For example,
`--node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled=foo,k8s.io/cluster-autoscaler/<cluster-name>=bar,my-custom-tag=custom-value`.
Now the ASG tags must have the correct values as well as the custom tag to be successfully discovered by the Cluster Autoscaler.
Example deployment:
@ -192,18 +196,24 @@ only the first instance type found will be used. See [Using Mixed Instances
Policies and Spot Instances](#Using-Mixed-Instances-Policies-and-Spot-Instances)
for details.
Cluster Autoscaler supports hints that nodes will be labelled when they join the
cluster via ASG tags. The tag is of the format
`k8s.io/cluster-autoscaler/node-template/label/<label-name>`. `<label-name>` is
When scaling up from 0 nodes, the Cluster Autoscaler reads ASG tags to derive information about the specifications of the nodes
i.e labels and taints in that ASG. Note that it does not actually apply these labels or taints - this is done by an AWS generated
user data script. It gives the Cluster Autoscaler information about whether pending pods will be able to be scheduled should a new node
be spun up for a particular ASG with the asumption the ASG tags accurately reflect the labels/taint actually applied.
The following is only required if scaling up from 0 nodes. The Cluster Autoscaler will require the label tag
on the ASG should a deployment have a NodeSelector, else no scaling will occur as the Cluster Autoscaler does not realise
the ASG has that particular label. The tag is of the format
`k8s.io/cluster-autoscaler/node-template/label/<label-name>`: `<label-value>` is
the name of the label and the value of each tag specifies the label value.
Example tags:
- `k8s.io/cluster-autoscaler/node-template/label/foo`: `bar`
Cluster Autoscaler supports hints that nodes will be tainted when they join the
cluster via ASG tags. The tag is of the format
`k8s.io/cluster-autoscaler/node-template/taint/<taint-name>`. `<taint-name>` is
The following is only required if scaling up from 0 nodes. The Cluster Autoscaler will require the taint tag
on the ASG, else tainted nodes may get spun up that cannot actually have the pending pods run on it. The tag is of the format
`k8s.io/cluster-autoscaler/node-template/taint/<taint-name>`:`<taint-value:taint-effect>` is
the name of the taint and the value of each tag specifies the taint value and effect with the format `<taint-value>:<taint-effect>`.
Example tags:
@ -243,7 +253,9 @@ Recommendations:
- It is recommended to use a second tag like
`k8s.io/cluster-autoscaler/<cluster-name>` when
`k8s.io/cluster-autoscaler/enabled` is used across many clusters to prevent
ASGs from different clusters recognized as the node groups.
ASGs from different clusters having conflicts.
An ASG must contain at least all the tags specified and as such secondary tags can differentiate between different
clusters ASGs.
- To prevent conflicts, do not provide a `--nodes` argument if
`--node-group-auto-discovery` is specified.
- Be sure to add `autoscaling:DescribeLaunchConfigurations` or
@ -252,7 +264,7 @@ Recommendations:
Configurations or Launch Templates.
- If Cluster Autoscaler adds a node to the cluster, and the node has taints applied
when it joins the cluster that Cluster Autoscaler was unaware of (because the tag
wasn't supplied), this can lead to significant confusion and misbehaviour.
wasn't supplied in ASG), this can lead to significant confusion and misbehaviour.
### Special note on GPU instances
@ -509,3 +521,4 @@ Please note: it is also possible to mount the cloud config file from host:
EC2 launch configuration has the setting `Metadata response hop limit` set to `2`.
Otherwise, the `/latest/api/token` call will timeout and result in an error. See [AWS docs here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html#configuring-instance-metadata-options) for further information.
- If you don't use EKS managed nodegroups, don't add the `eks:nodegroup-name` tag to the ASG as this will lead to extra EKS API calls that could slow down scaling when there are 0 nodes in the nodegroup.
- Set `AWS_MAX_ATTEMPTS` to configure max retries

View File

@ -8,7 +8,7 @@ import (
)
// ValidateEndpointHostHandler is a request handler that will validate the
// request endpoint's hosts is a valid RFC 3986 host.
// request endpoint's hosts is a valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) host.
var ValidateEndpointHostHandler = request.NamedHandler{
Name: "awssdk.protocol.ValidateEndpointHostHandler",
Fn: func(r *request.Request) {
@ -20,7 +20,7 @@ var ValidateEndpointHostHandler = request.NamedHandler{
}
// ValidateEndpointHost validates that the host string passed in is a valid RFC
// 3986 host. Returns error if the host is not valid.
// 3986 (https://www.ietf.org/rfc/rfc3986.txt) host. Returns error if the host is not valid.
func ValidateEndpointHost(opName, host string) error {
paramErrs := request.ErrInvalidParams{Context: opName}
@ -71,7 +71,7 @@ func ValidateEndpointHost(opName, host string) error {
return nil
}
// ValidHostLabel returns if the label is a valid RFC 3986 host label.
// ValidHostLabel returns if the label is a valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) host label.
func ValidHostLabel(label string) bool {
if l := len(label); l == 0 || l > 63 {
return false
@ -90,7 +90,7 @@ func ValidHostLabel(label string) bool {
return true
}
// ValidPortNumber return if the port is valid RFC 3986 port
// ValidPortNumber return if the port is valid RFC 3986 (https://www.ietf.org/rfc/rfc3986.txt) port
func ValidPortNumber(port string) bool {
i, err := strconv.Atoi(port)
if err != nil {

View File

@ -120,6 +120,11 @@ func (aws *awsCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
}, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (aws *awsCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
func (aws *awsCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
@ -362,14 +367,19 @@ func (ng *AwsNodeGroup) TemplateNodeInfo() (*schedulerframework.NodeInfo, error)
// BuildAWS builds AWS cloud provider, manager etc.
func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
var config io.ReadCloser
var cfg io.ReadCloser
if opts.CloudConfig != "" {
var err error
config, err = os.Open(opts.CloudConfig)
cfg, err = os.Open(opts.CloudConfig)
if err != nil {
klog.Fatalf("Couldn't open cloud provider configuration %s: %#v", opts.CloudConfig, err)
}
defer config.Close()
defer cfg.Close()
}
sdkProvider, err := createAWSSDKProvider(cfg)
if err != nil {
klog.Fatalf("Failed to create AWS SDK Provider: %v", err)
}
// Generate EC2 list
@ -377,12 +387,7 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
if opts.AWSUseStaticInstanceList {
klog.Warningf("Using static EC2 Instance Types, this list could be outdated. Last update time: %s", lastUpdateTime)
} else {
region, err := GetCurrentAwsRegion()
if err != nil {
klog.Fatalf("Failed to get AWS Region: %v", err)
}
generatedInstanceTypes, err := GenerateEC2InstanceTypes(region)
generatedInstanceTypes, err := GenerateEC2InstanceTypes(sdkProvider.session)
if err != nil {
klog.Errorf("Failed to generate AWS EC2 Instance Types: %v, falling back to static list with last update time: %s", err, lastUpdateTime)
}
@ -409,7 +414,7 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
klog.Infof("Successfully load %d EC2 Instance Types %s", len(keys), keys)
}
manager, err := CreateAwsManager(config, do, instanceTypes)
manager, err := CreateAwsManager(sdkProvider, do, instanceTypes)
if err != nil {
klog.Fatalf("Failed to create AWS Manager: %v", err)
}

View File

@ -14,37 +14,31 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
//go:generate go run ec2_instance_types/gen.go
//go:generate go run ec2_instance_types/gen.go -region $AWS_REGION
package aws
import (
"errors"
"fmt"
"io"
"math/rand"
"os"
"regexp"
"strconv"
"strings"
"time"
"gopkg.in/gcfg.v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/endpoints"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/eks"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
klog "k8s.io/klog/v2"
provider_aws "k8s.io/legacy-cloud-providers/aws"
)
const (
@ -74,131 +68,15 @@ type asgTemplate struct {
Tags []*autoscaling.TagDescription
}
func validateOverrides(cfg *provider_aws.CloudConfig) error {
if len(cfg.ServiceOverride) == 0 {
return nil
}
set := make(map[string]bool)
for onum, ovrd := range cfg.ServiceOverride {
// Note: gcfg does not space trim, so we have to when comparing to empty string ""
name := strings.TrimSpace(ovrd.Service)
if name == "" {
return fmt.Errorf("service name is missing [Service is \"\"] in override %s", onum)
}
// insure the map service name is space trimmed
ovrd.Service = name
region := strings.TrimSpace(ovrd.Region)
if region == "" {
return fmt.Errorf("service region is missing [Region is \"\"] in override %s", onum)
}
// insure the map region is space trimmed
ovrd.Region = region
url := strings.TrimSpace(ovrd.URL)
if url == "" {
return fmt.Errorf("url is missing [URL is \"\"] in override %s", onum)
}
signingRegion := strings.TrimSpace(ovrd.SigningRegion)
if signingRegion == "" {
return fmt.Errorf("signingRegion is missing [SigningRegion is \"\"] in override %s", onum)
}
signature := name + "_" + region
if set[signature] {
return fmt.Errorf("duplicate entry found for service override [%s] (%s in %s)", onum, name, region)
}
set[signature] = true
}
return nil
}
func getResolver(cfg *provider_aws.CloudConfig) endpoints.ResolverFunc {
defaultResolver := endpoints.DefaultResolver()
defaultResolverFn := func(service, region string,
optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
return defaultResolver.EndpointFor(service, region, optFns...)
}
if len(cfg.ServiceOverride) == 0 {
return defaultResolverFn
}
return func(service, region string,
optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
for _, override := range cfg.ServiceOverride {
if override.Service == service && override.Region == region {
return endpoints.ResolvedEndpoint{
URL: override.URL,
SigningRegion: override.SigningRegion,
SigningMethod: override.SigningMethod,
SigningName: override.SigningName,
}, nil
}
}
return defaultResolver.EndpointFor(service, region, optFns...)
}
}
type awsSDKProvider struct {
cfg *provider_aws.CloudConfig
}
func newAWSSDKProvider(cfg *provider_aws.CloudConfig) *awsSDKProvider {
return &awsSDKProvider{
cfg: cfg,
}
}
// getRegion deduces the current AWS Region.
func getRegion(cfg ...*aws.Config) string {
region, present := os.LookupEnv("AWS_REGION")
if !present {
sess, err := session.NewSession()
if err != nil {
klog.Errorf("Error getting AWS session while retrieving region: %v", err)
} else {
svc := ec2metadata.New(sess, cfg...)
if r, err := svc.Region(); err == nil {
region = r
}
}
}
return region
}
// createAwsManagerInternal allows for custom objects to be passed in by tests
//
// #1449 If running tests outside of AWS without AWS_REGION among environment
// variables, avoid a 5+ second EC2 Metadata lookup timeout in getRegion by
// setting and resetting AWS_REGION before calling createAWSManagerInternal:
//
// defer resetAWSRegion(os.LookupEnv("AWS_REGION"))
// os.Setenv("AWS_REGION", "fanghorn")
func createAWSManagerInternal(
configReader io.Reader,
awsSDKProvider *awsSDKProvider,
discoveryOpts cloudprovider.NodeGroupDiscoveryOptions,
awsService *awsWrapper,
instanceTypes map[string]*InstanceType,
) (*AwsManager, error) {
cfg, err := readAWSCloudConfig(configReader)
if err != nil {
klog.Errorf("Couldn't read config: %v", err)
return nil, err
}
if err = validateOverrides(cfg); err != nil {
klog.Errorf("Unable to validate custom endpoint overrides: %v", err)
return nil, err
}
if awsService == nil {
awsSdkProvider := newAWSSDKProvider(cfg)
sess, err := session.NewSession(aws.NewConfig().WithRegion(getRegion()).
WithEndpointResolver(getResolver(awsSdkProvider.cfg)))
if err != nil {
return nil, err
}
sess := awsSDKProvider.session
awsService = &awsWrapper{autoscaling.New(sess), ec2.New(sess), eks.New(sess)}
}
@ -228,24 +106,9 @@ func createAWSManagerInternal(
return manager, nil
}
// readAWSCloudConfig reads an instance of AWSCloudConfig from config reader.
func readAWSCloudConfig(config io.Reader) (*provider_aws.CloudConfig, error) {
var cfg provider_aws.CloudConfig
var err error
if config != nil {
err = gcfg.ReadInto(&cfg, config)
if err != nil {
return nil, err
}
}
return &cfg, nil
}
// CreateAwsManager constructs awsManager object.
func CreateAwsManager(configReader io.Reader, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, instanceTypes map[string]*InstanceType) (*AwsManager, error) {
return createAWSManagerInternal(configReader, discoveryOpts, nil, instanceTypes)
func CreateAwsManager(awsSDKProvider *awsSDKProvider, discoveryOpts cloudprovider.NodeGroupDiscoveryOptions, instanceTypes map[string]*InstanceType) (*AwsManager, error) {
return createAWSManagerInternal(awsSDKProvider, discoveryOpts, nil, instanceTypes)
}
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.

View File

@ -17,12 +17,7 @@ limitations under the License.
package aws
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"reflect"
"sort"
"strconv"
@ -37,33 +32,12 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
provider_aws "k8s.io/legacy-cloud-providers/aws"
)
// TestGetRegion ensures correct source supplies AWS Region.
func TestGetRegion(t *testing.T) {
key := "AWS_REGION"
// Ensure environment variable retains precedence.
expected1 := "the-shire-1"
t.Setenv(key, expected1)
assert.Equal(t, expected1, getRegion())
// Ensure without environment variable, EC2 Metadata is used.
expected2 := "mordor-2"
expectedjson := ec2metadata.EC2InstanceIdentityDocument{Region: expected2}
js, _ := json.Marshal(expectedjson)
os.Unsetenv(key)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write(js)
}))
cfg := aws.NewConfig().WithEndpoint(server.URL)
assert.Equal(t, expected2, getRegion(cfg))
}
func TestJoinNodeLabelsChoosingUserValuesOverAPIValues(t *testing.T) {
extractedLabels := make(map[string]string)
mngLabels := make(map[string]string)
@ -820,271 +794,6 @@ type ServiceDescriptor struct {
signingName string
}
func TestOverridesActiveConfig(t *testing.T) {
tests := []struct {
name string
reader io.Reader
aws provider_aws.Services
expectError bool
active bool
servicesOverridden []ServiceDescriptor
}{
{
"No overrides",
strings.NewReader(`
[global]
`),
nil,
false, false,
[]ServiceDescriptor{},
},
{
"Missing Service Name",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Region=sregion
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing Service Region",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing URL",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service="s3"
Region=sregion
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing Signing Region",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=sregion
URL=https://s3.foo.bar
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Active Overrides",
strings.NewReader(`
[Global]
[ServiceOverride "1"]
Service = "s3 "
Region = sregion
URL = https://s3.foo.bar
SigningRegion = sregion
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "sregion", signingRegion: "sregion", signingMethod: "v4"}},
},
{
"Multiple Overridden Services",
strings.NewReader(`
[Global]
vpc = vpc-abc1234567
[ServiceOverride "1"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v4
[ServiceOverride "2"]
Service=ec2
Region=sregion2
URL=https://ec2.foo.bar
SigningRegion=sregion2
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "sregion1", signingRegion: "sregion1", signingMethod: "v4"},
{name: "ec2", region: "sregion2", signingRegion: "sregion2", signingMethod: "v4"}},
},
{
"Duplicate Services",
strings.NewReader(`
[Global]
vpc = vpc-abc1234567
[ServiceOverride "1"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
[ServiceOverride "2"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Multiple Overridden Services in Multiple regions",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=region1
URL=https://s3.foo.bar
SigningRegion=sregion1
[ServiceOverride "2"]
Service=ec2
Region=region2
URL=https://ec2.foo.bar
SigningRegion=sregion
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: ""},
{name: "ec2", region: "region2", signingRegion: "sregion", signingMethod: "v4"}},
},
{
"Multiple regions, Same Service",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=region1
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v3
[ServiceOverride "2"]
Service=s3
Region=region2
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v4
SigningName = "name"
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: "v3"},
{name: "s3", region: "region2", signingRegion: "sregion1", signingMethod: "v4", signingName: "name"}},
},
}
for _, test := range tests {
t.Logf("Running test case %s", test.name)
cfg, err := readAWSCloudConfig(test.reader)
if err == nil {
err = validateOverrides(cfg)
}
if test.expectError {
if err == nil {
t.Errorf("Should error for case %s (cfg=%v)", test.name, cfg)
}
} else {
if err != nil {
t.Errorf("Should succeed for case: %s, got %v", test.name, err)
}
if len(cfg.ServiceOverride) != len(test.servicesOverridden) {
t.Errorf("Expected %d overridden services, received %d for case %s",
len(test.servicesOverridden), len(cfg.ServiceOverride), test.name)
} else {
for _, sd := range test.servicesOverridden {
var found *struct {
Service string
Region string
URL string
SigningRegion string
SigningMethod string
SigningName string
}
for _, v := range cfg.ServiceOverride {
if v.Service == sd.name && v.Region == sd.region {
found = v
break
}
}
if found == nil {
t.Errorf("Missing override for service %s in case %s",
sd.name, test.name)
} else {
if found.SigningRegion != sd.signingRegion {
t.Errorf("Expected signing region '%s', received '%s' for case %s",
sd.signingRegion, found.SigningRegion, test.name)
}
if found.SigningMethod != sd.signingMethod {
t.Errorf("Expected signing method '%s', received '%s' for case %s",
sd.signingMethod, found.SigningRegion, test.name)
}
targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
if found.URL != targetName {
t.Errorf("Expected Endpoint '%s', received '%s' for case %s",
targetName, found.URL, test.name)
}
if found.SigningName != sd.signingName {
t.Errorf("Expected signing name '%s', received '%s' for case %s",
sd.signingName, found.SigningName, test.name)
}
fn := getResolver(cfg)
ep1, e := fn(sd.name, sd.region, nil)
if e != nil {
t.Errorf("Expected a valid endpoint for %s in case %s",
sd.name, test.name)
} else {
targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
if ep1.URL != targetName {
t.Errorf("Expected endpoint url: %s, received %s in case %s",
targetName, ep1.URL, test.name)
}
if ep1.SigningRegion != sd.signingRegion {
t.Errorf("Expected signing region '%s', received '%s' in case %s",
sd.signingRegion, ep1.SigningRegion, test.name)
}
if ep1.SigningMethod != sd.signingMethod {
t.Errorf("Expected signing method '%s', received '%s' in case %s",
sd.signingMethod, ep1.SigningRegion, test.name)
}
}
}
}
}
}
}
}
func tagsMatcher(expected *autoscaling.DescribeAutoScalingGroupsInput) func(*autoscaling.DescribeAutoScalingGroupsInput) bool {
return func(actual *autoscaling.DescribeAutoScalingGroupsInput) bool {
expectedTags := flatTagSlice(expected.Filters)

View File

@ -0,0 +1,188 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package aws
import (
"fmt"
"gopkg.in/gcfg.v1"
"io"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/endpoints"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
"k8s.io/klog/v2"
provider_aws "k8s.io/legacy-cloud-providers/aws"
"os"
"strconv"
"strings"
)
// createAWSSDKProvider
//
// #1449 If running tests outside of AWS without AWS_REGION among environment
// variables, avoid a 5+ second EC2 Metadata lookup timeout in getRegion by
// setting and resetting AWS_REGION before calling createAWSSDKProvider:
//
// t.Setenv("AWS_REGION", "fanghorn")
func createAWSSDKProvider(configReader io.Reader) (*awsSDKProvider, error) {
cfg, err := readAWSCloudConfig(configReader)
if err != nil {
klog.Errorf("Couldn't read config: %v", err)
return nil, err
}
if err = validateOverrides(cfg); err != nil {
klog.Errorf("Unable to validate custom endpoint overrides: %v", err)
return nil, err
}
config := aws.NewConfig().
WithRegion(getRegion()).
WithEndpointResolver(getResolver(cfg))
config, err = setMaxRetriesFromEnv(config)
if err != nil {
return nil, err
}
sess, err := session.NewSession(config)
if err != nil {
return nil, err
}
provider := &awsSDKProvider{
session: sess,
}
return provider, nil
}
// setMaxRetriesFromEnv sets aws config MaxRetries by reading AWS_MAX_ATTEMPTS
// aws sdk does not auto-set these so instead of having more config options we can reuse what the aws cli
// does and read AWS_MAX_ATTEMPTS from the env https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html
func setMaxRetriesFromEnv(config *aws.Config) (*aws.Config, error) {
maxRetries := os.Getenv("AWS_MAX_ATTEMPTS")
if maxRetries != "" {
num, err := strconv.Atoi(maxRetries)
if err != nil {
return nil, err
}
config = config.WithMaxRetries(num)
}
return config, nil
}
type awsSDKProvider struct {
session *session.Session
}
// readAWSCloudConfig reads an instance of AWSCloudConfig from config reader.
func readAWSCloudConfig(config io.Reader) (*provider_aws.CloudConfig, error) {
var cfg provider_aws.CloudConfig
var err error
if config != nil {
err = gcfg.ReadInto(&cfg, config)
if err != nil {
return nil, err
}
}
return &cfg, nil
}
func validateOverrides(cfg *provider_aws.CloudConfig) error {
if len(cfg.ServiceOverride) == 0 {
return nil
}
set := make(map[string]bool)
for onum, ovrd := range cfg.ServiceOverride {
// Note: gcfg does not space trim, so we have to when comparing to empty string ""
name := strings.TrimSpace(ovrd.Service)
if name == "" {
return fmt.Errorf("service name is missing [Service is \"\"] in override %s", onum)
}
// insure the map service name is space trimmed
ovrd.Service = name
region := strings.TrimSpace(ovrd.Region)
if region == "" {
return fmt.Errorf("service region is missing [Region is \"\"] in override %s", onum)
}
// insure the map region is space trimmed
ovrd.Region = region
url := strings.TrimSpace(ovrd.URL)
if url == "" {
return fmt.Errorf("url is missing [URL is \"\"] in override %s", onum)
}
signingRegion := strings.TrimSpace(ovrd.SigningRegion)
if signingRegion == "" {
return fmt.Errorf("signingRegion is missing [SigningRegion is \"\"] in override %s", onum)
}
signature := name + "_" + region
if set[signature] {
return fmt.Errorf("duplicate entry found for service override [%s] (%s in %s)", onum, name, region)
}
set[signature] = true
}
return nil
}
func getResolver(cfg *provider_aws.CloudConfig) endpoints.ResolverFunc {
defaultResolver := endpoints.DefaultResolver()
defaultResolverFn := func(service, region string,
optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
return defaultResolver.EndpointFor(service, region, optFns...)
}
if len(cfg.ServiceOverride) == 0 {
return defaultResolverFn
}
return func(service, region string,
optFns ...func(*endpoints.Options)) (endpoints.ResolvedEndpoint, error) {
for _, override := range cfg.ServiceOverride {
if override.Service == service && override.Region == region {
return endpoints.ResolvedEndpoint{
URL: override.URL,
SigningRegion: override.SigningRegion,
SigningMethod: override.SigningMethod,
SigningName: override.SigningName,
}, nil
}
}
return defaultResolver.EndpointFor(service, region, optFns...)
}
}
// getRegion deduces the current AWS Region.
func getRegion(cfg ...*aws.Config) string {
region, present := os.LookupEnv("AWS_REGION")
if !present {
sess, err := session.NewSession()
if err != nil {
klog.Errorf("Error getting AWS session while retrieving region: %v", err)
} else {
svc := ec2metadata.New(sess, cfg...)
if r, err := svc.Region(); err == nil {
region = r
}
}
}
return region
}

View File

@ -0,0 +1,316 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package aws
import (
"encoding/json"
"fmt"
"github.com/stretchr/testify/assert"
"io"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/ec2metadata"
provider_aws "k8s.io/legacy-cloud-providers/aws"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
)
// TestGetRegion ensures correct source supplies AWS Region.
func TestGetRegion(t *testing.T) {
key := "AWS_REGION"
// Ensure environment variable retains precedence.
expected1 := "the-shire-1"
t.Setenv(key, expected1)
assert.Equal(t, expected1, getRegion())
// Ensure without environment variable, EC2 Metadata is used.
expected2 := "mordor-2"
expectedjson := ec2metadata.EC2InstanceIdentityDocument{Region: expected2}
js, _ := json.Marshal(expectedjson)
os.Unsetenv(key)
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Write(js)
}))
cfg := aws.NewConfig().WithEndpoint(server.URL)
assert.Equal(t, expected2, getRegion(cfg))
}
func TestOverridesActiveConfig(t *testing.T) {
tests := []struct {
name string
reader io.Reader
aws provider_aws.Services
expectError bool
active bool
servicesOverridden []ServiceDescriptor
}{
{
"No overrides",
strings.NewReader(`
[global]
`),
nil,
false, false,
[]ServiceDescriptor{},
},
{
"Missing Service Name",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Region=sregion
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing Service Region",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing URL",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service="s3"
Region=sregion
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Missing Signing Region",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=sregion
URL=https://s3.foo.bar
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Active Overrides",
strings.NewReader(`
[Global]
[ServiceOverride "1"]
Service = "s3 "
Region = sregion
URL = https://s3.foo.bar
SigningRegion = sregion
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "sregion", signingRegion: "sregion", signingMethod: "v4"}},
},
{
"Multiple Overridden Services",
strings.NewReader(`
[Global]
vpc = vpc-abc1234567
[ServiceOverride "1"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v4
[ServiceOverride "2"]
Service=ec2
Region=sregion2
URL=https://ec2.foo.bar
SigningRegion=sregion2
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "sregion1", signingRegion: "sregion1", signingMethod: "v4"},
{name: "ec2", region: "sregion2", signingRegion: "sregion2", signingMethod: "v4"}},
},
{
"Duplicate Services",
strings.NewReader(`
[Global]
vpc = vpc-abc1234567
[ServiceOverride "1"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
[ServiceOverride "2"]
Service=s3
Region=sregion1
URL=https://s3.foo.bar
SigningRegion=sregion
SigningMethod = sign
`),
nil,
true, false,
[]ServiceDescriptor{},
},
{
"Multiple Overridden Services in Multiple regions",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=region1
URL=https://s3.foo.bar
SigningRegion=sregion1
[ServiceOverride "2"]
Service=ec2
Region=region2
URL=https://ec2.foo.bar
SigningRegion=sregion
SigningMethod = v4
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: ""},
{name: "ec2", region: "region2", signingRegion: "sregion", signingMethod: "v4"}},
},
{
"Multiple regions, Same Service",
strings.NewReader(`
[global]
[ServiceOverride "1"]
Service=s3
Region=region1
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v3
[ServiceOverride "2"]
Service=s3
Region=region2
URL=https://s3.foo.bar
SigningRegion=sregion1
SigningMethod = v4
SigningName = "name"
`),
nil,
false, true,
[]ServiceDescriptor{{name: "s3", region: "region1", signingRegion: "sregion1", signingMethod: "v3"},
{name: "s3", region: "region2", signingRegion: "sregion1", signingMethod: "v4", signingName: "name"}},
},
}
for _, test := range tests {
t.Logf("Running test case %s", test.name)
cfg, err := readAWSCloudConfig(test.reader)
if err == nil {
err = validateOverrides(cfg)
}
if test.expectError {
if err == nil {
t.Errorf("Should error for case %s (cfg=%v)", test.name, cfg)
}
} else {
if err != nil {
t.Errorf("Should succeed for case: %s, got %v", test.name, err)
}
if len(cfg.ServiceOverride) != len(test.servicesOverridden) {
t.Errorf("Expected %d overridden services, received %d for case %s",
len(test.servicesOverridden), len(cfg.ServiceOverride), test.name)
} else {
for _, sd := range test.servicesOverridden {
var found *struct {
Service string
Region string
URL string
SigningRegion string
SigningMethod string
SigningName string
}
for _, v := range cfg.ServiceOverride {
if v.Service == sd.name && v.Region == sd.region {
found = v
break
}
}
if found == nil {
t.Errorf("Missing override for service %s in case %s",
sd.name, test.name)
} else {
if found.SigningRegion != sd.signingRegion {
t.Errorf("Expected signing region '%s', received '%s' for case %s",
sd.signingRegion, found.SigningRegion, test.name)
}
if found.SigningMethod != sd.signingMethod {
t.Errorf("Expected signing method '%s', received '%s' for case %s",
sd.signingMethod, found.SigningRegion, test.name)
}
targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
if found.URL != targetName {
t.Errorf("Expected Endpoint '%s', received '%s' for case %s",
targetName, found.URL, test.name)
}
if found.SigningName != sd.signingName {
t.Errorf("Expected signing name '%s', received '%s' for case %s",
sd.signingName, found.SigningName, test.name)
}
fn := getResolver(cfg)
ep1, e := fn(sd.name, sd.region, nil)
if e != nil {
t.Errorf("Expected a valid endpoint for %s in case %s",
sd.name, test.name)
} else {
targetName := fmt.Sprintf("https://%s.foo.bar", sd.name)
if ep1.URL != targetName {
t.Errorf("Expected endpoint url: %s, received %s in case %s",
targetName, ep1.URL, test.name)
}
if ep1.SigningRegion != sd.signingRegion {
t.Errorf("Expected signing region '%s', received '%s' in case %s",
sd.signingRegion, ep1.SigningRegion, test.name)
}
if ep1.SigningMethod != sd.signingMethod {
t.Errorf("Expected signing method '%s', received '%s' in case %s",
sd.signingMethod, ep1.SigningRegion, test.name)
}
}
}
}
}
}
}
}

View File

@ -32,19 +32,12 @@ var (
)
// GenerateEC2InstanceTypes returns a map of ec2 resources
func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
sess, err := session.NewSession(&aws.Config{
Region: aws.String(region)},
)
if err != nil {
return nil, err
}
func GenerateEC2InstanceTypes(sess *session.Session) (map[string]*InstanceType, error) {
ec2Client := ec2.New(sess)
input := ec2.DescribeInstanceTypesInput{}
instanceTypes := make(map[string]*InstanceType)
if err = ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool {
if err := ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool {
for _, rawInstanceType := range page.InstanceTypes {
instanceTypes[*rawInstanceType.InstanceType] = transformInstanceType(rawInstanceType)
}

View File

@ -91,6 +91,10 @@ func (m *awsWrapper) getManagedNodegroupInfo(nodegroupName string, clusterName s
labels["k8sVersion"] = *r.Nodegroup.Version
}
if r.Nodegroup.NodegroupName != nil && len(*r.Nodegroup.NodegroupName) > 0 {
labels["eks.amazonaws.com/nodegroup"] = *r.Nodegroup.NodegroupName
}
if r.Nodegroup.Labels != nil && len(r.Nodegroup.Labels) > 0 {
labelsMap := r.Nodegroup.Labels
for k, v := range labelsMap {

View File

@ -163,13 +163,14 @@ func TestGetManagedNodegroup(t *testing.T) {
assert.Equal(t, taintList[1].Effect, apiv1.TaintEffect(taintEffect2))
assert.Equal(t, taintList[1].Key, taintKey2)
assert.Equal(t, taintList[1].Value, taintValue2)
assert.Equal(t, len(labelMap), 6)
assert.Equal(t, len(labelMap), 7)
assert.Equal(t, labelMap[labelKey1], labelValue1)
assert.Equal(t, labelMap[labelKey2], labelValue2)
assert.Equal(t, labelMap["diskSize"], strconv.FormatInt(diskSize, 10))
assert.Equal(t, labelMap["amiType"], amiType)
assert.Equal(t, labelMap["capacityType"], capacityType)
assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
}
func TestGetManagedNodegroupWithNilValues(t *testing.T) {
@ -207,10 +208,11 @@ func TestGetManagedNodegroupWithNilValues(t *testing.T) {
taintList, labelMap, err := awsWrapper.getManagedNodegroupInfo(nodegroupName, clusterName)
assert.Nil(t, err)
assert.Equal(t, len(taintList), 0)
assert.Equal(t, len(labelMap), 3)
assert.Equal(t, len(labelMap), 4)
assert.Equal(t, labelMap["amiType"], amiType)
assert.Equal(t, labelMap["capacityType"], capacityType)
assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
}
func TestGetManagedNodegroupWithEmptyValues(t *testing.T) {
@ -248,10 +250,11 @@ func TestGetManagedNodegroupWithEmptyValues(t *testing.T) {
taintList, labelMap, err := awsWrapper.getManagedNodegroupInfo(nodegroupName, clusterName)
assert.Nil(t, err)
assert.Equal(t, len(taintList), 0)
assert.Equal(t, len(labelMap), 3)
assert.Equal(t, len(labelMap), 4)
assert.Equal(t, labelMap["amiType"], amiType)
assert.Equal(t, labelMap["capacityType"], capacityType)
assert.Equal(t, labelMap["k8sVersion"], k8sVersion)
assert.Equal(t, labelMap["eks.amazonaws.com/nodegroup"], nodegroupName)
}
func TestMoreThen100Groups(t *testing.T) {

View File

@ -28,7 +28,7 @@ type InstanceType struct {
}
// StaticListLastUpdateTime is a string declaring the last time the static list was updated.
var StaticListLastUpdateTime = "2022-09-16"
var StaticListLastUpdateTime = "2022-12-11"
// InstanceTypes is a map of ec2 resources
var InstanceTypes = map[string]*InstanceType{
@ -844,6 +844,69 @@ var InstanceTypes = map[string]*InstanceType{
GPU: 0,
Architecture: "amd64",
},
"c6in.12xlarge": {
InstanceType: "c6in.12xlarge",
VCPU: 48,
MemoryMb: 98304,
GPU: 0,
Architecture: "amd64",
},
"c6in.16xlarge": {
InstanceType: "c6in.16xlarge",
VCPU: 64,
MemoryMb: 131072,
GPU: 0,
Architecture: "amd64",
},
"c6in.24xlarge": {
InstanceType: "c6in.24xlarge",
VCPU: 96,
MemoryMb: 196608,
GPU: 0,
Architecture: "amd64",
},
"c6in.2xlarge": {
InstanceType: "c6in.2xlarge",
VCPU: 8,
MemoryMb: 16384,
GPU: 0,
Architecture: "amd64",
},
"c6in.32xlarge": {
InstanceType: "c6in.32xlarge",
VCPU: 128,
MemoryMb: 262144,
GPU: 0,
Architecture: "amd64",
},
"c6in.4xlarge": {
InstanceType: "c6in.4xlarge",
VCPU: 16,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"c6in.8xlarge": {
InstanceType: "c6in.8xlarge",
VCPU: 32,
MemoryMb: 65536,
GPU: 0,
Architecture: "amd64",
},
"c6in.large": {
InstanceType: "c6in.large",
VCPU: 2,
MemoryMb: 4096,
GPU: 0,
Architecture: "amd64",
},
"c6in.xlarge": {
InstanceType: "c6in.xlarge",
VCPU: 4,
MemoryMb: 8192,
GPU: 0,
Architecture: "amd64",
},
"c7g.12xlarge": {
InstanceType: "c7g.12xlarge",
VCPU: 48,
@ -2461,6 +2524,132 @@ var InstanceTypes = map[string]*InstanceType{
GPU: 0,
Architecture: "amd64",
},
"m6idn.12xlarge": {
InstanceType: "m6idn.12xlarge",
VCPU: 48,
MemoryMb: 196608,
GPU: 0,
Architecture: "amd64",
},
"m6idn.16xlarge": {
InstanceType: "m6idn.16xlarge",
VCPU: 64,
MemoryMb: 262144,
GPU: 0,
Architecture: "amd64",
},
"m6idn.24xlarge": {
InstanceType: "m6idn.24xlarge",
VCPU: 96,
MemoryMb: 393216,
GPU: 0,
Architecture: "amd64",
},
"m6idn.2xlarge": {
InstanceType: "m6idn.2xlarge",
VCPU: 8,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"m6idn.32xlarge": {
InstanceType: "m6idn.32xlarge",
VCPU: 128,
MemoryMb: 524288,
GPU: 0,
Architecture: "amd64",
},
"m6idn.4xlarge": {
InstanceType: "m6idn.4xlarge",
VCPU: 16,
MemoryMb: 65536,
GPU: 0,
Architecture: "amd64",
},
"m6idn.8xlarge": {
InstanceType: "m6idn.8xlarge",
VCPU: 32,
MemoryMb: 131072,
GPU: 0,
Architecture: "amd64",
},
"m6idn.large": {
InstanceType: "m6idn.large",
VCPU: 2,
MemoryMb: 8192,
GPU: 0,
Architecture: "amd64",
},
"m6idn.xlarge": {
InstanceType: "m6idn.xlarge",
VCPU: 4,
MemoryMb: 16384,
GPU: 0,
Architecture: "amd64",
},
"m6in.12xlarge": {
InstanceType: "m6in.12xlarge",
VCPU: 48,
MemoryMb: 196608,
GPU: 0,
Architecture: "amd64",
},
"m6in.16xlarge": {
InstanceType: "m6in.16xlarge",
VCPU: 64,
MemoryMb: 262144,
GPU: 0,
Architecture: "amd64",
},
"m6in.24xlarge": {
InstanceType: "m6in.24xlarge",
VCPU: 96,
MemoryMb: 393216,
GPU: 0,
Architecture: "amd64",
},
"m6in.2xlarge": {
InstanceType: "m6in.2xlarge",
VCPU: 8,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"m6in.32xlarge": {
InstanceType: "m6in.32xlarge",
VCPU: 128,
MemoryMb: 524288,
GPU: 0,
Architecture: "amd64",
},
"m6in.4xlarge": {
InstanceType: "m6in.4xlarge",
VCPU: 16,
MemoryMb: 65536,
GPU: 0,
Architecture: "amd64",
},
"m6in.8xlarge": {
InstanceType: "m6in.8xlarge",
VCPU: 32,
MemoryMb: 131072,
GPU: 0,
Architecture: "amd64",
},
"m6in.large": {
InstanceType: "m6in.large",
VCPU: 2,
MemoryMb: 8192,
GPU: 0,
Architecture: "amd64",
},
"m6in.xlarge": {
InstanceType: "m6in.xlarge",
VCPU: 4,
MemoryMb: 16384,
GPU: 0,
Architecture: "amd64",
},
"mac1.metal": {
InstanceType: "mac1.metal",
VCPU: 12,
@ -3378,6 +3567,132 @@ var InstanceTypes = map[string]*InstanceType{
GPU: 0,
Architecture: "amd64",
},
"r6idn.12xlarge": {
InstanceType: "r6idn.12xlarge",
VCPU: 48,
MemoryMb: 393216,
GPU: 0,
Architecture: "amd64",
},
"r6idn.16xlarge": {
InstanceType: "r6idn.16xlarge",
VCPU: 64,
MemoryMb: 524288,
GPU: 0,
Architecture: "amd64",
},
"r6idn.24xlarge": {
InstanceType: "r6idn.24xlarge",
VCPU: 96,
MemoryMb: 786432,
GPU: 0,
Architecture: "amd64",
},
"r6idn.2xlarge": {
InstanceType: "r6idn.2xlarge",
VCPU: 8,
MemoryMb: 65536,
GPU: 0,
Architecture: "amd64",
},
"r6idn.32xlarge": {
InstanceType: "r6idn.32xlarge",
VCPU: 128,
MemoryMb: 1048576,
GPU: 0,
Architecture: "amd64",
},
"r6idn.4xlarge": {
InstanceType: "r6idn.4xlarge",
VCPU: 16,
MemoryMb: 131072,
GPU: 0,
Architecture: "amd64",
},
"r6idn.8xlarge": {
InstanceType: "r6idn.8xlarge",
VCPU: 32,
MemoryMb: 262144,
GPU: 0,
Architecture: "amd64",
},
"r6idn.large": {
InstanceType: "r6idn.large",
VCPU: 2,
MemoryMb: 16384,
GPU: 0,
Architecture: "amd64",
},
"r6idn.xlarge": {
InstanceType: "r6idn.xlarge",
VCPU: 4,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"r6in.12xlarge": {
InstanceType: "r6in.12xlarge",
VCPU: 48,
MemoryMb: 393216,
GPU: 0,
Architecture: "amd64",
},
"r6in.16xlarge": {
InstanceType: "r6in.16xlarge",
VCPU: 64,
MemoryMb: 524288,
GPU: 0,
Architecture: "amd64",
},
"r6in.24xlarge": {
InstanceType: "r6in.24xlarge",
VCPU: 96,
MemoryMb: 786432,
GPU: 0,
Architecture: "amd64",
},
"r6in.2xlarge": {
InstanceType: "r6in.2xlarge",
VCPU: 8,
MemoryMb: 65536,
GPU: 0,
Architecture: "amd64",
},
"r6in.32xlarge": {
InstanceType: "r6in.32xlarge",
VCPU: 128,
MemoryMb: 1048576,
GPU: 0,
Architecture: "amd64",
},
"r6in.4xlarge": {
InstanceType: "r6in.4xlarge",
VCPU: 16,
MemoryMb: 131072,
GPU: 0,
Architecture: "amd64",
},
"r6in.8xlarge": {
InstanceType: "r6in.8xlarge",
VCPU: 32,
MemoryMb: 262144,
GPU: 0,
Architecture: "amd64",
},
"r6in.large": {
InstanceType: "r6in.large",
VCPU: 2,
MemoryMb: 16384,
GPU: 0,
Architecture: "amd64",
},
"r6in.xlarge": {
InstanceType: "r6in.xlarge",
VCPU: 4,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"t1.micro": {
InstanceType: "t1.micro",
VCPU: 1,
@ -3581,6 +3896,20 @@ var InstanceTypes = map[string]*InstanceType{
GPU: 0,
Architecture: "arm64",
},
"trn1.2xlarge": {
InstanceType: "trn1.2xlarge",
VCPU: 8,
MemoryMb: 32768,
GPU: 0,
Architecture: "amd64",
},
"trn1.32xlarge": {
InstanceType: "trn1.32xlarge",
VCPU: 128,
MemoryMb: 524288,
GPU: 0,
Architecture: "amd64",
},
"u-12tb1.112xlarge": {
InstanceType: "u-12tb1.112xlarge",
VCPU: 448,
@ -3588,6 +3917,20 @@ var InstanceTypes = map[string]*InstanceType{
GPU: 0,
Architecture: "amd64",
},
"u-18tb1.112xlarge": {
InstanceType: "u-18tb1.112xlarge",
VCPU: 448,
MemoryMb: 18874368,
GPU: 0,
Architecture: "amd64",
},
"u-24tb1.112xlarge": {
InstanceType: "u-24tb1.112xlarge",
VCPU: 448,
MemoryMb: 25165824,
GPU: 0,
Architecture: "amd64",
},
"u-3tb1.56xlarge": {
InstanceType: "u-3tb1.56xlarge",
VCPU: 224,

View File

@ -25,8 +25,11 @@ import (
"os"
"time"
"k8s.io/klog/v2"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws"
klog "k8s.io/klog/v2"
awssdk "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws/session"
)
var packageTemplate = template.Must(template.New("").Parse(`/*
@ -78,12 +81,22 @@ var InstanceTypes = map[string]*InstanceType{
// Please note that the IAM user running the static instance types generator must be
// a non-anonymous user with privileges to call the DescribeInstanceTypes EC2 API.
func main() {
var region = flag.String("region", "", "aws region you'd like to generate instances from."+
"It will populate list from all regions if region is not specified.")
var region = flag.String("region", "", "aws region you'd like to generate instances from.")
flag.Parse()
if awssdk.StringValue(region) == "" {
klog.Fatalf("Region is required to generate instance types")
}
defer klog.Flush()
instanceTypes, err := aws.GenerateEC2InstanceTypes(*region)
sess, err := session.NewSession(&awssdk.Config{
Region: region,
})
if err != nil {
klog.Fatal(err)
}
instanceTypes, err := aws.GenerateEC2InstanceTypes(sess)
if err != nil {
klog.Fatal(err)
}

View File

@ -125,10 +125,11 @@ func TestGetManagedNodegroupNoTaintsOrLabels(t *testing.T) {
assert.Equal(t, cacheObj.name, nodegroupName)
assert.Equal(t, cacheObj.clusterName, clusterName)
assert.Equal(t, len(cacheObj.taints), 0)
assert.Equal(t, len(cacheObj.labels), 3)
assert.Equal(t, len(cacheObj.labels), 4)
assert.Equal(t, cacheObj.labels["amiType"], amiType)
assert.Equal(t, cacheObj.labels["capacityType"], capacityType)
assert.Equal(t, cacheObj.labels["k8sVersion"], k8sVersion)
assert.Equal(t, cacheObj.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
}
func TestGetManagedNodegroupWithTaintsAndLabels(t *testing.T) {
@ -194,13 +195,14 @@ func TestGetManagedNodegroupWithTaintsAndLabels(t *testing.T) {
assert.Equal(t, cacheObj.taints[1].Effect, apiv1.TaintEffect(taintEffect2))
assert.Equal(t, cacheObj.taints[1].Key, taintKey2)
assert.Equal(t, cacheObj.taints[1].Value, taintValue2)
assert.Equal(t, len(cacheObj.labels), 6)
assert.Equal(t, len(cacheObj.labels), 7)
assert.Equal(t, cacheObj.labels[labelKey1], labelValue1)
assert.Equal(t, cacheObj.labels[labelKey2], labelValue2)
assert.Equal(t, cacheObj.labels["diskSize"], strconv.FormatInt(diskSize, 10))
assert.Equal(t, cacheObj.labels["amiType"], amiType)
assert.Equal(t, cacheObj.labels["capacityType"], capacityType)
assert.Equal(t, cacheObj.labels["k8sVersion"], k8sVersion)
assert.Equal(t, cacheObj.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
}
func TestGetManagedNodegroupInfoObjectWithError(t *testing.T) {
@ -294,13 +296,14 @@ func TestGetManagedNodegroupInfoObjectNoCachedNodegroup(t *testing.T) {
mngInfoObject, err := c.getManagedNodegroupInfoObject(nodegroupName, clusterName)
require.NoError(t, err)
assert.Equal(t, len(mngInfoObject.labels), 6)
assert.Equal(t, len(mngInfoObject.labels), 7)
assert.Equal(t, mngInfoObject.labels[labelKey1], labelValue1)
assert.Equal(t, mngInfoObject.labels[labelKey2], labelValue2)
assert.Equal(t, mngInfoObject.labels["diskSize"], strconv.FormatInt(diskSize, 10))
assert.Equal(t, mngInfoObject.labels["amiType"], amiType)
assert.Equal(t, mngInfoObject.labels["capacityType"], capacityType)
assert.Equal(t, mngInfoObject.labels["k8sVersion"], k8sVersion)
assert.Equal(t, mngInfoObject.labels["eks.amazonaws.com/nodegroup"], nodegroupName)
k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
ClusterName: &clusterName,
NodegroupName: &nodegroupName,
@ -377,13 +380,14 @@ func TestGetManagedNodegroupLabelsNoCachedNodegroup(t *testing.T) {
labelsMap, err := c.getManagedNodegroupLabels(nodegroupName, clusterName)
require.NoError(t, err)
assert.Equal(t, len(labelsMap), 6)
assert.Equal(t, len(labelsMap), 7)
assert.Equal(t, labelsMap[labelKey1], labelValue1)
assert.Equal(t, labelsMap[labelKey2], labelValue2)
assert.Equal(t, labelsMap["diskSize"], strconv.FormatInt(diskSize, 10))
assert.Equal(t, labelsMap["amiType"], amiType)
assert.Equal(t, labelsMap["capacityType"], capacityType)
assert.Equal(t, labelsMap["k8sVersion"], k8sVersion)
assert.Equal(t, labelsMap["eks.amazonaws.com/nodegroup"], nodegroupName)
k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
ClusterName: &clusterName,
NodegroupName: &nodegroupName,
@ -471,13 +475,14 @@ func TestGetManagedNodegroupLabelsWithCachedNodegroupThatExpires(t *testing.T) {
// Query for nodegroup entry after it expires - should have the new labels added
newLabelsMap, err := c.getManagedNodegroupLabels(nodegroupName, clusterName)
require.NoError(t, err)
assert.Equal(t, len(newLabelsMap), 6)
assert.Equal(t, len(newLabelsMap), 7)
assert.Equal(t, newLabelsMap[labelKey1], labelValue1)
assert.Equal(t, newLabelsMap[labelKey2], labelValue2)
assert.Equal(t, newLabelsMap["diskSize"], strconv.FormatInt(diskSize, 10))
assert.Equal(t, newLabelsMap["amiType"], amiType)
assert.Equal(t, newLabelsMap["capacityType"], capacityType)
assert.Equal(t, newLabelsMap["k8sVersion"], k8sVersion)
assert.Equal(t, newLabelsMap["eks.amazonaws.com/nodegroup"], nodegroupName)
k.AssertCalled(t, "DescribeNodegroup", &eks.DescribeNodegroupInput{
ClusterName: &clusterName,
NodegroupName: &nodegroupName,

View File

@ -106,6 +106,11 @@ func (azure *AzureCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovid
return azure.azureManager.GetNodeGroupForInstance(ref)
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (azure *AzureCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
func (azure *AzureCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -51,7 +51,7 @@ rules:
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]

View File

@ -180,6 +180,11 @@ func (baiducloud *baiducloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (c
return asg, err
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (baiducloud *baiducloudCloudProvider) HasInstance(*apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (baiducloud *baiducloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -104,6 +104,11 @@ func (d *bizflycloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprov
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (d *bizflycloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not
// available. Implementation optional.
func (d *bizflycloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -81,6 +81,11 @@ func (b *brightboxCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovid
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (b *brightboxCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Refresh is before every main loop and can be used to dynamically
// update cloud provider state.
// In particular the list of node groups returned by NodeGroups can

View File

@ -122,6 +122,11 @@ func (ccp *cherryCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (ccp *cherryCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
func (ccp *cherryCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented

View File

@ -607,8 +607,6 @@ func (mgr *cherryManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, upd
func BuildGenericLabels(nodegroup string, plan *Plan) map[string]string {
result := make(map[string]string)
//result[kubeletapis.LabelArch] = "amd64"
//result[kubeletapis.LabelOS] = "linux"
result[apiv1.LabelInstanceType] = plan.Name
//result[apiv1.LabelZoneRegion] = ""
//result[apiv1.LabelZoneFailureDomain] = "0"

View File

@ -99,6 +99,11 @@ func (d *civoCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.No
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (d *civoCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not
// available. Implementation optional.
func (d *civoCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -100,6 +100,10 @@ type CloudProvider interface {
// occurred. Must be implemented.
NodeGroupForNode(*apiv1.Node) (NodeGroup, error)
// HasInstance returns whether the node has corresponding instance in cloud provider,
// true if the node has an instance, false if it no longer exists
HasInstance(*apiv1.Node) (bool, error)
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
Pricing() (PricingModel, errors.AutoscalerError)

View File

@ -68,6 +68,11 @@ func (provider *cloudStackCloudProvider) NodeGroupForNode(node *v1.Node) (cloudp
return provider.manager.clusterForNode(node)
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (provider *cloudStackCloudProvider) HasInstance(node *v1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
func (provider *cloudStackCloudProvider) Cleanup() error {
return provider.manager.cleanup()

View File

@ -5,6 +5,30 @@ the [cluster-api project](https://github.com/kubernetes-sigs/cluster-api) to
manage the provisioning and de-provisioning of nodes within a Kubernetes
cluster.
## Table of Contents:
<!-- TOC BEGIN -->
* [Kubernetes Version](#kubernetes-version)
* [Starting the Autoscaler](#starting-the-autoscaler)
* [Configuring node group auto discovery](#configuring-node-group-auto-discovery)
* [Connecting cluster-autoscaler to Cluster API management and workload Clusters](#connecting-cluster-autoscaler-to-cluster-api-management-and-workload-clusters)
* [Autoscaler running in a joined cluster using service account credentials](#autoscaler-running-in-a-joined-cluster-using-service-account-credentials)
* [Autoscaler running in workload cluster using service account credentials, with separate management cluster](#autoscaler-running-in-workload-cluster-using-service-account-credentials-with-separate-management-cluster)
* [Autoscaler running in management cluster using service account credentials, with separate workload cluster](#autoscaler-running-in-management-cluster-using-service-account-credentials-with-separate-workload-cluster)
* [Autoscaler running anywhere, with separate kubeconfigs for management and workload clusters](#autoscaler-running-anywhere-with-separate-kubeconfigs-for-management-and-workload-clusters)
* [Autoscaler running anywhere, with a common kubeconfig for management and workload clusters](#autoscaler-running-anywhere-with-a-common-kubeconfig-for-management-and-workload-clusters)
* [Enabling Autoscaling](#enabling-autoscaling)
* [Scale from zero support](#scale-from-zero-support)
* [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero)
* [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero)
* [Specifying a Custom Resource Group](#specifying-a-custom-resource-group)
* [Specifying a Custom Resource Version](#specifying-a-custom-resource-version)
* [Sample manifest](#sample-manifest)
* [A note on permissions](#a-note-on-permissions)
* [Autoscaling with ClusterClass and Managed Topologies](#autoscaling-with-clusterclass-and-managed-topologies)
* [Special note on GPU instances](#special-note-on-gpu-instances)
* [Special note on balancing similar node groups](#special-note-on-balancing-similar-node-groups)
<!-- TOC END -->
## Kubernetes Version
The cluster-api provider requires Kubernetes v1.16 or greater to run the
@ -322,3 +346,84 @@ spec:
**Warning**: If the Autoscaler is enabled **and** the replicas field is set for a `MachineDeployment` or `MachineSet` the Cluster may enter a broken state where replicas become unpredictable.
If the replica field is unset in the Cluster definition Autoscaling can be enabled [as described above](#enabling-autoscaling)
## Special note on GPU instances
As with other providers, if the device plugin on nodes that provides GPU
resources takes some time to advertise the GPU resource to the cluster, this
may cause Cluster Autoscaler to unnecessarily scale out multiple times.
To avoid this, you can configure `kubelet` on your GPU nodes to label the node
before it joins the cluster by passing it the `--node-labels` flag. For the
CAPI cloudprovider, the label format is as follows:
`cluster-api/accelerator=<gpu-type>`
`<gpu-type>` is arbitrary.
It is important to note that if you are using the `--gpu-total` flag to limit the number
of GPU resources in your cluster that the `<gpu-type>` value must match
between the command line flag and the node labels. Setting these values incorrectly
can lead to the autoscaler creating too many GPU resources.
For example, if you are using the autoscaler command line flag
`--gpu-total=gfx-hardware:1:2` to limit the number of `gfx-hardware` resources
to a minimum of 1 and maximum of 2, then you should use the kubelet node label flag
`--node-labels=cluster-api/accelerator=gfx-hardware`.
## Special note on balancing similar node groups
The Cluster Autoscaler feature to enable balancing similar node groups
(activated with the `--balance-similar-node-groups` flag) is a powerful and
popular feature. When enabled, the Cluster Autoscaler will attempt to create
new nodes by adding them in a manner that balances the creation between
similar node groups. With Cluster API, these node groups correspond directly
to the scalable resources associated (usually MachineDeployments and MachineSets)
with the nodes in question. In order for the nodes of these scalable resources
to be considered similar by the Cluster Autoscaler, they must have the same
capacity, labels, and taints for the nodes which will be created from them.
To help assist the Cluster Autoscaler in determining which node groups are
similar, the command line flags `--balancing-ignore-label` and
`--balancing-label` are provided. For an expanded discussion about balancing
similar node groups and the options which are available, please see the
[Cluster Autoscaler FAQ](../../FAQ.md).
Because Cluster API can address many different cloud providers, it is important
to configure the balancing labels to ignore provider-specific labels which
are used for carrying zonal information on Kubernetes nodes. The Cluster
Autoscaler implementation for Cluster API does not assume any labels (aside from
the [well-known Kubernetes labels](https://kubernetes.io/docs/reference/labels-annotations-taints/))
to be ignored when running. Users must configure their Cluster Autoscaler deployment
to ignore labels which might be different between nodes, but which do not
otherwise affect node behavior or size (for example when two MachineDeployments
are the same except for their deployment zones). The Cluster API community has
decided not to carry cloud provider specific labels in the Cluster Autoscaler
to reduce the possibility for labels to clash between providers. Additionally,
the community has agreed to promote documentation and the use of the `--balancing-ignore-label`
flag as the preferred method of deployment to reduce the extended need for
maintenance on the Cluster Autoscaler when new providers are added or updated.
For further context around this decision, please see the
[Cluster API Deep Dive into Cluster Autoscaler Node Group Balancing discussion from 2022-09-12](https://www.youtube.com/watch?v=jbhca_9oPuQ&t=5s).
The following table shows some of the most common labels used by cloud providers
to designate regional or zonal information on Kubernetes nodes. It is shared
here as a reference for users who might be deploying on these infrastructures.
| Cloud Provider | Label to ignore | Notes |
| --- | --- | --- |
| Alibaba Cloud | `topology.diskplugin.csi.alibabacloud.com/zone` | Used by the Alibaba Cloud CSI driver as a target for persistent volume node affinity |
| AWS | `alpha.eksctl.io/instance-id` | Used by `eksctl` to identify instances |
| AWS | `alpha.eksctl.io/nodegroup-name` | Used by `eksctl` to identify node group names |
| AWS | `eks.amazonaws.com/nodegroup` | Used by EKS to identify node groups |
| AWS | `k8s.amazonaws.com/eniConfig` | Used by the AWS CNI for custom networking |
| AWS | `lifecycle` | Used by AWS as a label for spot instances |
| AWS | `topology.ebs.csi.aws.com/zone` | Used by the AWS EBS CSI driver as a target for persistent volume node affinity |
| Azure | `topology.disk.csi.azure.com/zone` | Used as the topology key by the Azure Disk CSI driver |
| Azure | `agentpool` | Legacy label used to specify to which Azure node pool a particular node belongs |
| Azure | `kubernetes.azure.com/agentpool` | Used by AKS to identify to which node pool a particular node belongs |
| GCE | `topology.gke.io/zone` | Used to specify the zone of the node |
| IBM Cloud | `ibm-cloud.kubernetes.io/worker-id` | Used by the IBM Cloud Cloud Controller Manager to identify the node |
| IBM Cloud | `vpc-block-csi-driver-labels` | Used by the IBM Cloud CSI driver as a target for persistent volume node affinity |
| IBM Cloud | `ibm-cloud.kubernetes.io/vpc-instance-id` | Used when a VPC is in use on IBM Cloud |

View File

@ -25,7 +25,6 @@ import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
kubeletapis "k8s.io/kubelet/pkg/apis"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@ -370,10 +369,8 @@ func buildGenericLabels(nodeName string) map[string]string {
// TODO revisit this function and add an explanation about what these
// labels are used for, or remove them if not necessary
m := make(map[string]string)
m[kubeletapis.LabelArch] = cloudprovider.DefaultArch
m[corev1.LabelArchStable] = cloudprovider.DefaultArch
m[kubeletapis.LabelOS] = cloudprovider.DefaultOS
m[corev1.LabelOSStable] = cloudprovider.DefaultOS
m[corev1.LabelHostname] = nodeName
@ -387,10 +384,8 @@ func extractNodeLabels(node *corev1.Node) map[string]string {
return m
}
setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelArch)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelArchStable)
setLabelIfNotEmpty(m, node.Labels, kubeletapis.LabelOS)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelOSStable)
setLabelIfNotEmpty(m, node.Labels, corev1.LabelInstanceType)

View File

@ -1310,9 +1310,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
expectedErr: nil,
nodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"beta.kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"beta.kubernetes.io/arch": "amd64",
},
expectedCapacity: map[corev1.ResourceName]int64{
corev1.ResourceCPU: 2,
@ -1322,9 +1320,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
},
expectedNodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"beta.kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"beta.kubernetes.io/arch": "amd64",
"kubernetes.io/hostname": "random value",
},
},
@ -1340,9 +1336,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
expectedErr: nil,
nodeLabels: map[string]string{
"kubernetes.io/os": "windows",
"beta.kubernetes.io/os": "windows",
"kubernetes.io/arch": "arm64",
"beta.kubernetes.io/arch": "arm64",
"node.kubernetes.io/instance-type": "instance1",
},
expectedCapacity: map[corev1.ResourceName]int64{
@ -1353,9 +1347,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
expectedNodeLabels: map[string]string{
"kubernetes.io/hostname": "random value",
"kubernetes.io/os": "windows",
"beta.kubernetes.io/os": "windows",
"kubernetes.io/arch": "arm64",
"beta.kubernetes.io/arch": "arm64",
"node.kubernetes.io/instance-type": "instance1",
},
},

View File

@ -81,6 +81,11 @@ func (p *provider) NodeGroupForNode(node *corev1.Node) (cloudprovider.NodeGroup,
return ng, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (p *provider) HasInstance(node *corev1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
func (*provider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
}

View File

@ -101,6 +101,11 @@ func (d *digitaloceanCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpro
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (d *digitaloceanCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not
// available. Implementation optional.
func (d *digitaloceanCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -131,6 +131,11 @@ func (e *exoscaleCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
return nodeGroup, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (e *exoscaleCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (e *exoscaleCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -134,6 +134,11 @@ func (e *externalGrpcCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpro
return ng, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (e *externalGrpcCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// pricingModel implements cloudprovider.PricingModel interface.
type pricingModel struct {
client protos.CloudProviderClient

View File

@ -101,6 +101,11 @@ func (gce *GceCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.N
return mig, err
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (gce *GceCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
func (gce *GceCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return gce.pricingModel, nil
@ -177,7 +182,6 @@ type Mig interface {
cloudprovider.NodeGroup
GceRef() GceRef
Version() string
}
type gceMig struct {
@ -188,11 +192,6 @@ type gceMig struct {
maxSize int
}
// Version return the Mig version.
func (mig *gceMig) Version() string {
return ""
}
// GceRef returns Mig's GceRef
func (mig *gceMig) GceRef() GceRef {
return mig.gceRef

View File

@ -589,7 +589,11 @@ func (m *gceManagerImpl) GetMigTemplateNode(mig Mig) (*apiv1.Node, error) {
if err != nil {
return nil, err
}
return m.templates.BuildNodeFromTemplate(mig, template, machineType.CPU, machineType.Memory, nil, m.reserved)
migOsInfo, err := m.templates.MigOsInfo(mig.Id(), template)
if err != nil {
return nil, err
}
return m.templates.BuildNodeFromTemplate(mig, migOsInfo, template, machineType.CPU, machineType.Memory, nil, m.reserved)
}
// parseMIGAutoDiscoverySpecs returns any provided NodeGroupAutoDiscoverySpecs

View File

@ -87,7 +87,10 @@ type GceReserved struct{}
// CalculateKernelReserved computes how much memory Linux kernel will reserve.
// TODO(jkaniuk): account for crashkernel reservation on RHEL / CentOS
func (r *GceReserved) CalculateKernelReserved(physicalMemory int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64 {
func (r *GceReserved) CalculateKernelReserved(m MigOsInfo, physicalMemory int64) int64 {
os := m.Os()
osDistribution := m.OsDistribution()
arch := m.Arch()
switch os {
case OperatingSystemLinux:
// Account for memory reserved by kernel
@ -267,7 +270,9 @@ func EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(diskCount int64, osDist
}
// CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold
func (r *GceReserved) CalculateOSReservedEphemeralStorage(diskSize int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64 {
func (r *GceReserved) CalculateOSReservedEphemeralStorage(m MigOsInfo, diskSize int64) int64 {
osDistribution := m.OsDistribution()
arch := m.Arch()
switch osDistribution {
case OperatingSystemDistributionCOS:
storage := int64(math.Ceil(0.015635*float64(diskSize))) + int64(math.Ceil(4.148*GiB)) // os partition estimation
@ -289,3 +294,30 @@ func (r *GceReserved) CalculateOSReservedEphemeralStorage(diskSize int64, os Ope
return 0
}
}
// GceMigOsInfo contains os details of nodes in gce mig.
type GceMigOsInfo struct {
os OperatingSystem
osDistribution OperatingSystemDistribution
arch SystemArchitecture
}
// Os return operating system.
func (m *GceMigOsInfo) Os() OperatingSystem {
return m.os
}
// OsDistribution return operating system distribution.
func (m *GceMigOsInfo) OsDistribution() OperatingSystemDistribution {
return m.osDistribution
}
// Arch return system architecture.
func (m *GceMigOsInfo) Arch() SystemArchitecture {
return m.arch
}
// NewMigOsInfo return gce implementation of MigOsInfo interface.
func NewMigOsInfo(os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture) MigOsInfo {
return &GceMigOsInfo{os, osDistribution, arch}
}

View File

@ -108,7 +108,8 @@ func TestCalculateKernelReservedLinux(t *testing.T) {
for idx, tc := range testCases {
r := &GceReserved{}
t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
reserved := r.CalculateKernelReserved(tc.physicalMemory, OperatingSystemLinux, tc.osDistribution, tc.arch, "")
m := NewMigOsInfo(OperatingSystemLinux, tc.osDistribution, tc.arch)
reserved := r.CalculateKernelReserved(m, tc.physicalMemory)
if tc.osDistribution == OperatingSystemDistributionUbuntu {
assert.Equal(t, tc.reservedMemory+int64(math.Min(correctionConstant*float64(tc.physicalMemory), maximumCorrectionValue)+ubuntuSpecificOffset), reserved)
} else if tc.osDistribution == OperatingSystemDistributionCOS {

View File

@ -16,13 +16,23 @@ limitations under the License.
package gce
// MigOsInfo store os parameters.
type MigOsInfo interface {
// Os return operating system.
Os() OperatingSystem
// OsDistribution return operating system distribution.
OsDistribution() OperatingSystemDistribution
// Arch return system architecture.
Arch() SystemArchitecture
}
// OsReservedCalculator calculates the OS reserved values.
type OsReservedCalculator interface {
// CalculateKernelReserved computes how much memory OS kernel will reserve.
// NodeVersion parameter is optional. If empty string is passed a result calculated using default node version will be returned.
CalculateKernelReserved(physicalMemory int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64
CalculateKernelReserved(m MigOsInfo, physicalMemory int64) int64
// CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold.
// NodeVersion parameter is optional. If empty string is passed a result calculated using default node version will be returned.
CalculateOSReservedEphemeralStorage(diskSize int64, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture, nodeVersion string) int64
CalculateOSReservedEphemeralStorage(m MigOsInfo, diskSize int64) int64
}

View File

@ -71,8 +71,8 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator
}
// BuildCapacity builds a list of resource capacities given list of hardware.
func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig, os OperatingSystem, osDistribution OperatingSystemDistribution, arch SystemArchitecture,
ephemeralStorage int64, ephemeralStorageLocalSSDCount int64, pods *int64, version string, r OsReservedCalculator, extendedResources apiv1.ResourceList) (apiv1.ResourceList, error) {
func (t *GceTemplateBuilder) BuildCapacity(m MigOsInfo, cpu int64, mem int64, accelerators []*gce.AcceleratorConfig,
ephemeralStorage int64, ephemeralStorageLocalSSDCount int64, pods *int64, r OsReservedCalculator, extendedResources apiv1.ResourceList) (apiv1.ResourceList, error) {
capacity := apiv1.ResourceList{}
if pods == nil {
capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
@ -81,7 +81,7 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
}
capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
memTotal := mem - r.CalculateKernelReserved(mem, os, osDistribution, arch, version)
memTotal := mem - r.CalculateKernelReserved(m, mem)
capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI)
if accelerators != nil && len(accelerators) > 0 {
@ -91,9 +91,9 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
if ephemeralStorage > 0 {
var storageTotal int64
if ephemeralStorageLocalSSDCount > 0 {
storageTotal = ephemeralStorage - EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(ephemeralStorageLocalSSDCount, osDistribution)
storageTotal = ephemeralStorage - EphemeralStorageOnLocalSSDFilesystemOverheadInBytes(ephemeralStorageLocalSSDCount, m.OsDistribution())
} else {
storageTotal = ephemeralStorage - r.CalculateOSReservedEphemeralStorage(ephemeralStorage, os, osDistribution, arch, version)
storageTotal = ephemeralStorage - r.CalculateOSReservedEphemeralStorage(m, ephemeralStorage)
}
capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(math.Max(float64(storageTotal), 0)), resource.DecimalSI)
}
@ -160,8 +160,31 @@ func getKubeEnvValueFromTemplateMetadata(template *gce.InstanceTemplate) (string
return "", nil
}
// MigOsInfo return os detailes information that stored in template.
func (t *GceTemplateBuilder) MigOsInfo(migId string, template *gce.InstanceTemplate) (MigOsInfo, error) {
kubeEnvValue, err := getKubeEnvValueFromTemplateMetadata(template)
if err != nil {
return nil, fmt.Errorf("could not obtain kube-env from template metadata; %v", err)
}
os := extractOperatingSystemFromKubeEnv(kubeEnvValue)
if os == OperatingSystemUnknown {
return nil, fmt.Errorf("could not obtain os from kube-env from template metadata")
}
osDistribution := extractOperatingSystemDistributionFromKubeEnv(kubeEnvValue)
if osDistribution == OperatingSystemDistributionUnknown {
return nil, fmt.Errorf("could not obtain os-distribution from kube-env from template metadata")
}
arch, err := extractSystemArchitectureFromKubeEnv(kubeEnvValue)
if err != nil {
arch = DefaultArch
klog.Errorf("Couldn't extract architecture from kube-env for MIG %q, falling back to %q. Error: %v", migId, arch, err)
}
return NewMigOsInfo(os, osDistribution, arch), nil
}
// BuildNodeFromTemplate builds node from provided GCE template.
func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.InstanceTemplate, cpu int64, mem int64, pods *int64, reserved OsReservedCalculator) (*apiv1.Node, error) {
func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, migOsInfo MigOsInfo, template *gce.InstanceTemplate, cpu int64, mem int64, pods *int64, reserved OsReservedCalculator) (*apiv1.Node, error) {
if template.Properties == nil {
return nil, fmt.Errorf("instance template %s has no properties", template.Name)
@ -181,22 +204,6 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
Labels: map[string]string{},
}
// This call is safe even if kubeEnvValue is empty
os := extractOperatingSystemFromKubeEnv(kubeEnvValue)
if os == OperatingSystemUnknown {
return nil, fmt.Errorf("could not obtain os from kube-env from template metadata")
}
osDistribution := extractOperatingSystemDistributionFromKubeEnv(kubeEnvValue)
if osDistribution == OperatingSystemDistributionUnknown {
return nil, fmt.Errorf("could not obtain os-distribution from kube-env from template metadata")
}
arch, err := extractSystemArchitectureFromKubeEnv(kubeEnvValue)
if err != nil {
arch = DefaultArch
klog.Errorf("Couldn't extract architecture from kube-env for MIG %q, falling back to %q. Error: %v", mig.Id(), arch, err)
}
addBootDiskAnnotations(&node, template.Properties)
var ephemeralStorage int64 = -1
if !isBootDiskEphemeralStorageWithInstanceTemplateDisabled(kubeEnvValue) {
@ -225,7 +232,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
klog.Errorf("could not fetch extended resources from instance template: %v", err)
}
capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators, os, osDistribution, arch, ephemeralStorage, ephemeralStorageLocalSsdCount, pods, mig.Version(), reserved, extendedResources)
capacity, err := t.BuildCapacity(migOsInfo, cpu, mem, template.Properties.GuestAccelerators, ephemeralStorage, ephemeralStorageLocalSsdCount, pods, reserved, extendedResources)
if err != nil {
return nil, err
}
@ -269,7 +276,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
node.Status.Allocatable = nodeAllocatable
}
// GenericLabels
labels, err := BuildGenericLabels(mig.GceRef(), template.Properties.MachineType, nodeName, os, arch)
labels, err := BuildGenericLabels(mig.GceRef(), template.Properties.MachineType, nodeName, migOsInfo.Os(), migOsInfo.Arch())
if err != nil {
return nil, err
}

View File

@ -65,7 +65,8 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
ephemeralStorageLocalSSDCount int64
extendedResources apiv1.ResourceList
// test outputs
expectedErr bool
expectedMigInfoErr bool
expectedNodeTemplateErr bool
}
testCases := []testCase{
{
@ -86,7 +87,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedCpu: "1000m",
reservedMemory: fmt.Sprintf("%v", 1*units.MiB),
reservedEphemeralStorage: "30Gi",
expectedErr: false,
},
{
scenario: "no kube-reserved in kube-env",
@ -97,18 +97,16 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: false,
expectedErr: false,
}, {
scenario: "no kube-env at all",
kubeEnv: "",
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: false,
expectedErr: false,
}, {
scenario: "totally messed up kube-env",
kubeEnv: "This kube-env is totally messed up",
expectedErr: true,
expectedMigInfoErr: true,
}, {
scenario: "max pods per node specified",
kubeEnv: "",
@ -116,7 +114,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
physicalMemory: 200 * units.MiB,
pods: &thirtyPodsPerNode,
kubeReserved: false,
expectedErr: false,
},
{
scenario: "BLOCK_EPH_STORAGE_BOOT_DISK in kube-env",
@ -133,7 +130,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedEphemeralStorage: "0Gi",
kubeReserved: true,
isEphemeralStorageBlocked: true,
expectedErr: false,
},
{
scenario: "BLOCK_EPH_STORAGE_BOOT_DISK is false in kube-env",
@ -146,14 +142,13 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedMemory: fmt.Sprintf("%v", 0*units.MiB),
reservedEphemeralStorage: "0Gi",
kubeReserved: true,
expectedErr: false,
},
{
scenario: "more local SSDs requested for ephemeral storage than attached",
kubeEnv: "AUTOSCALER_ENV_VARS: os_distribution=cos;os=linux;ephemeral_storage_local_ssd_count=1\n",
ephemeralStorageLocalSSDCount: 1,
attachedLocalSSDCount: 0,
expectedErr: true,
expectedNodeTemplateErr: true,
},
{
scenario: "all attached local SSDs requested for ephemeral storage",
@ -163,7 +158,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
bootDiskSizeGiB: 300,
ephemeralStorageLocalSSDCount: 2,
attachedLocalSSDCount: 2,
expectedErr: false,
},
{
scenario: "more local SSDs attached than requested for ephemeral storage",
@ -172,7 +166,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
physicalMemory: 200 * units.MiB,
ephemeralStorageLocalSSDCount: 2,
attachedLocalSSDCount: 4,
expectedErr: false,
},
{
scenario: "ephemeral storage on local SSDs with kube-reserved",
@ -185,7 +178,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedMemory: fmt.Sprintf("%v", 0*units.MiB),
reservedEphemeralStorage: "10Gi",
attachedLocalSSDCount: 4,
expectedErr: false,
},
{
scenario: "extended_resources present in kube-env",
@ -198,7 +190,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedMemory: fmt.Sprintf("%v", 0*units.MiB),
reservedEphemeralStorage: "10Gi",
attachedLocalSSDCount: 4,
expectedErr: false,
extendedResources: apiv1.ResourceList{
apiv1.ResourceName("someResource"): *resource.NewQuantity(2, resource.DecimalSI),
apiv1.ResourceName("anotherResource"): *resource.NewQuantity(1*units.GB, resource.DecimalSI),
@ -215,7 +206,6 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
reservedMemory: fmt.Sprintf("%v", 0*units.MiB),
reservedEphemeralStorage: "10Gi",
attachedLocalSSDCount: 4,
expectedErr: false,
extendedResources: apiv1.ResourceList{},
},
}
@ -256,8 +246,13 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
if tc.kubeEnv != "" {
template.Properties.Metadata.Items = []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}}
}
node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory, tc.pods, &GceReserved{})
if tc.expectedErr {
migOsInfo, err := tb.MigOsInfo(mig.Id(), template)
if tc.expectedMigInfoErr {
assert.Error(t, err)
return
}
node, err := tb.BuildNodeFromTemplate(mig, migOsInfo, template, tc.physicalCpu, tc.physicalMemory, tc.pods, &GceReserved{})
if tc.expectedNodeTemplateErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
@ -286,7 +281,8 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
} else if tc.isEphemeralStorageBlocked {
physicalEphemeralStorageGiB = 0
}
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators, OperatingSystemLinux, OperatingSystemDistributionCOS, "", physicalEphemeralStorageGiB*units.GiB, tc.ephemeralStorageLocalSSDCount, tc.pods, "", &GceReserved{}, tc.extendedResources)
migOsInfo := NewMigOsInfo(OperatingSystemLinux, OperatingSystemDistributionCOS, "")
capacity, err := tb.BuildCapacity(migOsInfo, tc.physicalCpu, tc.physicalMemory, tc.accelerators, physicalEphemeralStorageGiB*units.GiB, tc.ephemeralStorageLocalSSDCount, tc.pods, &GceReserved{}, tc.extendedResources)
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
if !tc.kubeReserved {
@ -593,7 +589,8 @@ func TestBuildCapacityMemory(t *testing.T) {
t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
tb := GceTemplateBuilder{}
noAccelerators := make([]*gce.AcceleratorConfig, 0)
buildCapacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, noAccelerators, tc.os, OperatingSystemDistributionCOS, "", -1, 0, nil, "", &GceReserved{}, apiv1.ResourceList{})
migOsInfo := NewMigOsInfo(tc.os, OperatingSystemDistributionCOS, "")
buildCapacity, err := tb.BuildCapacity(migOsInfo, tc.physicalCpu, tc.physicalMemory, noAccelerators, -1, 0, nil, &GceReserved{}, apiv1.ResourceList{})
assert.NoError(t, err)
expectedCapacity, err := makeResourceList2(tc.physicalCpu, tc.expectedCapacityMemory, 0, 110)
assert.NoError(t, err)
@ -1397,7 +1394,11 @@ func TestBuildNodeFromTemplateArch(t *testing.T) {
},
}
tb := &GceTemplateBuilder{}
gotNode, gotErr := tb.BuildNodeFromTemplate(mig, template, 16, 128, nil, &GceReserved{})
migOsInfo, gotErr := tb.MigOsInfo(mig.Id(), template)
if gotErr != nil {
t.Fatalf("MigOsInfo unexpected error: %v", gotErr)
}
gotNode, gotErr := tb.BuildNodeFromTemplate(mig, migOsInfo, template, 16, 128, nil, &GceReserved{})
if gotErr != nil {
t.Fatalf("BuildNodeFromTemplate unexpected error: %v", gotErr)
}

View File

@ -1,6 +1,8 @@
#approvers:
approvers:
- apricote
#- LKaemmerling
#- 4ND3R50N
#reviewers:
reviewers:
- apricote
#- LKaemmerling
#- 4ND3R50N

View File

@ -99,6 +99,11 @@ func (d *HetznerCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider
return group, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (d *HetznerCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not
// available. Implementation optional.
func (d *HetznerCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -317,6 +317,7 @@ func buildNodeGroupLabels(n *hetznerNodeGroup) map[string]string {
return map[string]string{
apiv1.LabelInstanceType: n.instanceType,
apiv1.LabelZoneRegionStable: n.region,
"csi.hetzner.cloud/location": n.region,
nodeGroupLabel: n.id,
}
}

View File

@ -31,6 +31,9 @@ spec:
- --scale-down-delay-after-add=1m0s
- --scale-down-unneeded-time=1m0s
- --expander=random
- --max-empty-bulk-delete=100
- --max-scale-down-parallelism=100
- --node-deletion-batcher-interval=10s
volumeMounts:
- name: cloud-config
mountPath: /config

View File

@ -123,6 +123,11 @@ func (hcp *huaweicloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudpr
return hcp.cloudServiceManager.GetAsgForInstance(instanceID)
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (hcp *huaweicloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available. Not implemented.
func (hcp *huaweicloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented

View File

@ -626,10 +626,57 @@ func (csm *cloudServiceManager) buildNodeFromTemplate(asgName string, template *
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))
node.Spec.Taints = extractTaintsFromTags(template.tags)
node.Status.Conditions = cloudprovider.BuildReadyConditions()
return &node, nil
}
// extractTaintsFromTags extract taints from as group tags.
// The tag is of the format "k8s.io_cluster-autoscaler_node-template_taint_<taint-key>". "<taint-key>" is
// the name of the taint and the value of each tag specifies the taint value and effect with the
// format "<taint-value>:<taint-effect>".
// Example tags: "k8s.io_cluster-autoscaler_node-template_taint_dedicated": "true:NoSchedule"
func extractTaintsFromTags(tags map[string]string) []apiv1.Taint {
taints := make([]apiv1.Taint, 0)
for tagKey, tagValue := range tags {
if !strings.Contains(tagKey, "k8s.io_cluster-autoscaler_node-template_taint_") {
continue
}
splits := strings.Split(tagKey, "k8s.io_cluster-autoscaler_node-template_taint_")
// If the tagKey is 'k8s.io_cluster-autoscaler_node-template_taint_', the second element is '',
// this should be ruled out.
if len(splits) < 2 || splits[1] == "" {
klog.Warningf("Invalid tag key format:%s", tagKey)
continue
}
values := strings.Split(tagValue, ":")
if len(values) != 2 {
klog.Warningf("Invalid tag value format:%s", tagValue)
continue
}
if values[1] != string(apiv1.TaintEffectNoSchedule) &&
values[1] != string(apiv1.TaintEffectPreferNoSchedule) &&
values[1] != string(apiv1.TaintEffectNoExecute) {
klog.Warningf("Invalid tag value format:%s", tagValue)
continue
}
taints = append(taints, apiv1.Taint{
Key: splits[1],
Value: values[0],
Effect: apiv1.TaintEffect(values[1]),
})
klog.V(6).Infof("Extract taints from tag key/value successfully:%s, %s", tagKey, tagValue)
}
return taints
}
func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
result := make(map[string]string)
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
@ -643,6 +690,10 @@ func buildGenericLabels(template *asgTemplate, nodeName string) map[string]strin
// append custom node labels
for key, value := range template.tags {
// ignore the tag which represents a taint
if strings.Contains(key, "k8s.io_cluster-autoscaler_node-template_taint_") {
continue
}
result[key] = value
}

View File

@ -0,0 +1,164 @@
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package huaweicloud
import (
"reflect"
"testing"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)
func Test_extractTaintsFromTags(t *testing.T) {
tests := []struct {
name string
args map[string]string
want []apiv1.Taint
}{
{
name: "tag in right format",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
},
want: []apiv1.Taint{
{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
},
},
{
name: "empty taint key should be ignored",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_": "bar:NoSchedule",
},
want: []apiv1.Taint{},
},
{
name: "invalid tag key should be ignored",
args: map[string]string{
"invalidTagKey": "bar:NoSchedule",
},
want: []apiv1.Taint{},
},
{
name: "invalid taint effect should be ignored",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:InvalidEffect",
},
want: []apiv1.Taint{},
},
{
name: "empty taint value",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": ":NoSchedule",
},
want: []apiv1.Taint{
{Key: "foo", Value: "", Effect: apiv1.TaintEffectNoSchedule},
},
},
{
name: "one tag with valid tag, one tag with invalid key, ignore the invalid one",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
"invalidTagKey": ":NoSchedule",
},
want: []apiv1.Taint{
{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
},
},
{
name: "one tag with valid key/value, one tag with invalid value, ignore the invalid one",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
"k8s.io_cluster-autoscaler_node-template_taint_bar": "invalidTagValue",
},
want: []apiv1.Taint{
{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
},
},
{
name: "one tag with valid key/value, one tag with invalid value length, ignore the invalid one",
args: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "bar:NoSchedule",
"k8s.io_cluster-autoscaler_node-template_taint_bar": "foo:NoSchedule:more",
},
want: []apiv1.Taint{
{Key: "foo", Value: "bar", Effect: apiv1.TaintEffectNoSchedule},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := extractTaintsFromTags(tt.args); !reflect.DeepEqual(got, tt.want) {
t.Errorf("extractTaintsFromTags() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildGenericLabels(t *testing.T) {
template := &asgTemplate{
name: "foo",
region: "foo",
zone: "foo",
}
tests := []struct {
name string
tags map[string]string
want map[string]string
}{
{
name: "tags contain taints key, ignore it when extract labels",
tags: map[string]string{
"k8s.io_cluster-autoscaler_node-template_taint_foo": "true:PreferNoSchedule",
"foo": "bar",
},
want: map[string]string{
apiv1.LabelArchStable: cloudprovider.DefaultArch,
apiv1.LabelOSStable: cloudprovider.DefaultOS,
apiv1.LabelInstanceTypeStable: template.name,
apiv1.LabelTopologyRegion: template.region,
apiv1.LabelTopologyZone: template.zone,
apiv1.LabelHostname: "foo",
"foo": "bar",
},
},
{
name: "tags don't contain taints key",
tags: map[string]string{
"foo": "bar",
},
want: map[string]string{
apiv1.LabelArchStable: cloudprovider.DefaultArch,
apiv1.LabelOSStable: cloudprovider.DefaultOS,
apiv1.LabelInstanceTypeStable: template.name,
apiv1.LabelTopologyRegion: template.region,
apiv1.LabelTopologyZone: template.zone,
apiv1.LabelHostname: "foo",
"foo": "bar",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
template.tags = tt.tags
if got := buildGenericLabels(template, "foo"); !reflect.DeepEqual(got, tt.want) {
t.Errorf("buildGenericLabels() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -232,6 +232,11 @@ func (ic *IonosCloudCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprov
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (ic *IonosCloudCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not
// available. Implementation optional.
func (ic *IonosCloudCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -18,9 +18,8 @@ package kamatera
import (
"context"
"encoding/hex"
"fmt"
"github.com/satori/go.uuid"
"github.com/google/uuid"
"k8s.io/autoscaler/cluster-autoscaler/version"
"k8s.io/klog/v2"
"strings"
@ -266,5 +265,5 @@ func kamateraServerName(namePrefix string) string {
if len(namePrefix) > 0 {
namePrefix = fmt.Sprintf("%s-", namePrefix)
}
return fmt.Sprintf("%s%s", namePrefix, hex.EncodeToString(uuid.NewV4().Bytes()))
return fmt.Sprintf("%s%s", namePrefix, strings.ReplaceAll(uuid.New().String(), "-", ""))
}

View File

@ -70,6 +70,11 @@ func (k *kamateraCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (k *kamateraCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (k *kamateraCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -18,14 +18,13 @@ package kamatera
import (
"context"
"encoding/hex"
"fmt"
uuid "github.com/satori/go.uuid"
"github.com/google/uuid"
"github.com/stretchr/testify/mock"
"strings"
)
func mockKamateraServerName() string {
return fmt.Sprintf("%s", hex.EncodeToString(uuid.NewV4().Bytes()))
return strings.ReplaceAll(uuid.New().String(), "-", "")
}
func mockServerConfig(namePrefix string, tags []string) ServerConfig {

View File

@ -139,6 +139,11 @@ func (kubemark *KubemarkCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloud
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (kubemark *KubemarkCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// GetAvailableMachineTypes get all machine types that can be requested from the cloud provider.
// Implementation optional.
func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, error) {

View File

@ -80,6 +80,11 @@ func (kubemark *KubemarkCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloud
return nil, cloudprovider.ErrNotImplemented
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (kubemark *KubemarkCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// GetAvailableMachineTypes get all machine types that can be requested from the cloud provider.
// Implementation optional.
func (kubemark *KubemarkCloudProvider) GetAvailableMachineTypes() ([]string, error) {

View File

@ -67,6 +67,11 @@ func (l *linodeCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (l *linodeCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (l *linodeCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {

View File

@ -39,7 +39,7 @@ rules:
resources: ["daemonsets", "replicasets", "statefulsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["watch", "list", "get"]
- apiGroups: [""]
resources: ["configmaps"]

View File

@ -135,6 +135,11 @@ func (mcp *magnumCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovide
return nil, nil
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (mcp *magnumCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing is not implemented.
func (mcp *magnumCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented

View File

@ -23,7 +23,7 @@ import (
"testing"
"time"
"github.com/satori/go.uuid"
"github.com/gofrs/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
@ -46,7 +46,12 @@ func (m *magnumManagerDiscoveryMock) autoDiscoverNodeGroups(cfgs []magnumAutoDis
ngs := []*nodegroups.NodeGroup{}
two := 2
for i := 0; i < rand.Intn(20); i++ {
ngs = append(ngs, &nodegroups.NodeGroup{Name: uuid.NewV4().String(), NodeCount: 1, MinNodeCount: 1, MaxNodeCount: &two})
newUUID, err := uuid.NewV4()
if err != nil {
return nil, fmt.Errorf("failed to produce a random UUID: %v", err)
}
newUUIDStr := newUUID.String()
ngs = append(ngs, &nodegroups.NodeGroup{Name: newUUIDStr, NodeCount: 1, MinNodeCount: 1, MaxNodeCount: &two})
}
return ngs, nil
}

View File

@ -21,7 +21,7 @@ import (
"sort"
"strings"
"github.com/satori/go.uuid"
"github.com/gofrs/uuid"
apiv1 "k8s.io/api/core/v1"

View File

@ -177,6 +177,29 @@ func (_m *CloudProvider) NodeGroupForNode(_a0 *v1.Node) (cloudprovider.NodeGroup
return r0, r1
}
// HasInstance provides a mock function with given fields:
func (_m *CloudProvider) HasInstance(_a0 *v1.Node) (bool, error) {
ret := _m.Called(_a0)
var r0 bool
if rf, ok := ret.Get(0).(func(*v1.Node) bool); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(bool)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(*v1.Node) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// NodeGroups provides a mock function with given fields:
func (_m *CloudProvider) NodeGroups() []cloudprovider.NodeGroup {
ret := _m.Called()

View File

@ -1,5 +1,5 @@
approvers:
#- jlamillan
- jlamillan
reviewers:
#- jlamillan
- jlamillan
#- ericrrath

View File

@ -15,6 +15,9 @@ metadata:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: ["storage.k8s.io"]
resources: ["csidriver", "csistoragecapacities"]
verbs: ["watch", "list"]
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
@ -24,16 +27,16 @@ rules:
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources: ["namepaces"]
verbs: ["list"]
verbs: ["watch", "list", "get", "patch", "update"]
- apiGroups: [""]
resources:
- "pods"
@ -53,10 +56,10 @@ rules:
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["watch", "list", "get"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
@ -64,10 +67,6 @@ rules:
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["csidrivers", "csistoragecapacities"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role

View File

@ -15,6 +15,9 @@ metadata:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: ["storage.k8s.io"]
resources: ["csidriver", "csistoragecapacities"]
verbs: ["watch", "list"]
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
@ -24,16 +27,16 @@ rules:
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["list"]
verbs: ["watch", "list", "get", "patch", "update"]
- apiGroups: [""]
resources:
- "pods"
@ -53,10 +56,10 @@ rules:
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes"]
verbs: ["get", "list", "watch"]
- apiGroups: ["batch"]
resources: ["jobs", "cronjobs"]
verbs: ["watch", "list", "get"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
@ -64,10 +67,6 @@ rules:
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["csidrivers", "csistoragecapacities"]
verbs: ["get", "list"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role

View File

@ -0,0 +1,74 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"net/http"
)
// GetWorkRequestRequest wrapper for the GetWorkRequest operation
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/GetWorkRequest.go.html to see an example of how to use GetWorkRequestRequest.
type GetWorkRequestRequest struct {
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
// Metadata about the request. This information will not be transmitted to the service, but
// represents information that the SDK will consume to drive retry behavior.
RequestMetadata common.RequestMetadata
}
func (request GetWorkRequestRequest) String() string {
return common.PointerString(request)
}
// HTTPRequest implements the OCIRequest interface
func (request GetWorkRequestRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
}
// BinaryRequestBody implements the OCIRequest interface
func (request GetWorkRequestRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
return nil, false
}
// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
func (request GetWorkRequestRequest) RetryPolicy() *common.RetryPolicy {
return request.RequestMetadata.RetryPolicy
}
// GetWorkRequestResponse wrapper for the GetWorkRequest operation
type GetWorkRequestResponse struct {
// The underlying http response
RawResponse *http.Response
// The WorkRequest instance
WorkRequest `presentIn:"body"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
}
func (response GetWorkRequestResponse) String() string {
return common.PointerString(response)
}
// HTTPResponse implements the OCIResponse interface
func (response GetWorkRequestResponse) HTTPResponse() *http.Response {
return response.RawResponse
}

View File

@ -0,0 +1,115 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"net/http"
)
// ListWorkRequestErrorsRequest wrapper for the ListWorkRequestErrors operation
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestErrors.go.html to see an example of how to use ListWorkRequestErrorsRequest.
type ListWorkRequestErrorsRequest struct {
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
// For list pagination. The maximum number of results per page, or items to return in a
// paginated "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
// For list pagination. The value of the `opc-next-page` response header from the
// previous "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Page *string `mandatory:"false" contributesTo:"query" name:"page"`
// The sort order to use, either ascending (`ASC`) or descending (`DESC`).
SortOrder ListWorkRequestErrorsSortOrderEnum `mandatory:"false" contributesTo:"query" name:"sortOrder" omitEmpty:"true"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
// Metadata about the request. This information will not be transmitted to the service, but
// represents information that the SDK will consume to drive retry behavior.
RequestMetadata common.RequestMetadata
}
func (request ListWorkRequestErrorsRequest) String() string {
return common.PointerString(request)
}
// HTTPRequest implements the OCIRequest interface
func (request ListWorkRequestErrorsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
}
// BinaryRequestBody implements the OCIRequest interface
func (request ListWorkRequestErrorsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
return nil, false
}
// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
func (request ListWorkRequestErrorsRequest) RetryPolicy() *common.RetryPolicy {
return request.RequestMetadata.RetryPolicy
}
// ListWorkRequestErrorsResponse wrapper for the ListWorkRequestErrors operation
type ListWorkRequestErrorsResponse struct {
// The underlying http response
RawResponse *http.Response
// A list of []WorkRequestError instances
Items []WorkRequestError `presentIn:"body"`
// For list pagination. When this header appears in the response, additional pages of
// results remain. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
}
func (response ListWorkRequestErrorsResponse) String() string {
return common.PointerString(response)
}
// HTTPResponse implements the OCIResponse interface
func (response ListWorkRequestErrorsResponse) HTTPResponse() *http.Response {
return response.RawResponse
}
// ListWorkRequestErrorsSortOrderEnum Enum with underlying type: string
type ListWorkRequestErrorsSortOrderEnum string
// Set of constants representing the allowable values for ListWorkRequestErrorsSortOrderEnum
const (
ListWorkRequestErrorsSortOrderAsc ListWorkRequestErrorsSortOrderEnum = "ASC"
ListWorkRequestErrorsSortOrderDesc ListWorkRequestErrorsSortOrderEnum = "DESC"
)
var mappingListWorkRequestErrorsSortOrder = map[string]ListWorkRequestErrorsSortOrderEnum{
"ASC": ListWorkRequestErrorsSortOrderAsc,
"DESC": ListWorkRequestErrorsSortOrderDesc,
}
// GetListWorkRequestErrorsSortOrderEnumValues Enumerates the set of values for ListWorkRequestErrorsSortOrderEnum
func GetListWorkRequestErrorsSortOrderEnumValues() []ListWorkRequestErrorsSortOrderEnum {
values := make([]ListWorkRequestErrorsSortOrderEnum, 0)
for _, v := range mappingListWorkRequestErrorsSortOrder {
values = append(values, v)
}
return values
}

View File

@ -0,0 +1,115 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"net/http"
)
// ListWorkRequestLogsRequest wrapper for the ListWorkRequestLogs operation
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestLogs.go.html to see an example of how to use ListWorkRequestLogsRequest.
type ListWorkRequestLogsRequest struct {
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
WorkRequestId *string `mandatory:"true" contributesTo:"path" name:"workRequestId"`
// For list pagination. The maximum number of results per page, or items to return in a
// paginated "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
// For list pagination. The value of the `opc-next-page` response header from the
// previous "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Page *string `mandatory:"false" contributesTo:"query" name:"page"`
// The sort order to use, either ascending (`ASC`) or descending (`DESC`).
SortOrder ListWorkRequestLogsSortOrderEnum `mandatory:"false" contributesTo:"query" name:"sortOrder" omitEmpty:"true"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
// Metadata about the request. This information will not be transmitted to the service, but
// represents information that the SDK will consume to drive retry behavior.
RequestMetadata common.RequestMetadata
}
func (request ListWorkRequestLogsRequest) String() string {
return common.PointerString(request)
}
// HTTPRequest implements the OCIRequest interface
func (request ListWorkRequestLogsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
}
// BinaryRequestBody implements the OCIRequest interface
func (request ListWorkRequestLogsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
return nil, false
}
// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
func (request ListWorkRequestLogsRequest) RetryPolicy() *common.RetryPolicy {
return request.RequestMetadata.RetryPolicy
}
// ListWorkRequestLogsResponse wrapper for the ListWorkRequestLogs operation
type ListWorkRequestLogsResponse struct {
// The underlying http response
RawResponse *http.Response
// A list of []WorkRequestLogEntry instances
Items []WorkRequestLogEntry `presentIn:"body"`
// For list pagination. When this header appears in the response, additional pages of
// results remain. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
}
func (response ListWorkRequestLogsResponse) String() string {
return common.PointerString(response)
}
// HTTPResponse implements the OCIResponse interface
func (response ListWorkRequestLogsResponse) HTTPResponse() *http.Response {
return response.RawResponse
}
// ListWorkRequestLogsSortOrderEnum Enum with underlying type: string
type ListWorkRequestLogsSortOrderEnum string
// Set of constants representing the allowable values for ListWorkRequestLogsSortOrderEnum
const (
ListWorkRequestLogsSortOrderAsc ListWorkRequestLogsSortOrderEnum = "ASC"
ListWorkRequestLogsSortOrderDesc ListWorkRequestLogsSortOrderEnum = "DESC"
)
var mappingListWorkRequestLogsSortOrder = map[string]ListWorkRequestLogsSortOrderEnum{
"ASC": ListWorkRequestLogsSortOrderAsc,
"DESC": ListWorkRequestLogsSortOrderDesc,
}
// GetListWorkRequestLogsSortOrderEnumValues Enumerates the set of values for ListWorkRequestLogsSortOrderEnum
func GetListWorkRequestLogsSortOrderEnumValues() []ListWorkRequestLogsSortOrderEnum {
values := make([]ListWorkRequestLogsSortOrderEnum, 0)
for _, v := range mappingListWorkRequestLogsSortOrder {
values = append(values, v)
}
return values
}

View File

@ -0,0 +1,92 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"net/http"
)
// ListWorkRequestsRequest wrapper for the ListWorkRequests operation
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequests.go.html to see an example of how to use ListWorkRequestsRequest.
type ListWorkRequestsRequest struct {
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment.
CompartmentId *string `mandatory:"true" contributesTo:"query" name:"compartmentId"`
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the resource.
ResourceId *string `mandatory:"false" contributesTo:"query" name:"resourceId"`
// For list pagination. The maximum number of results per page, or items to return in a
// paginated "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Limit *int `mandatory:"false" contributesTo:"query" name:"limit"`
// For list pagination. The value of the `opc-next-page` response header from the
// previous "List" call. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
Page *string `mandatory:"false" contributesTo:"query" name:"page"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `mandatory:"false" contributesTo:"header" name:"opc-request-id"`
// Metadata about the request. This information will not be transmitted to the service, but
// represents information that the SDK will consume to drive retry behavior.
RequestMetadata common.RequestMetadata
}
func (request ListWorkRequestsRequest) String() string {
return common.PointerString(request)
}
// HTTPRequest implements the OCIRequest interface
func (request ListWorkRequestsRequest) HTTPRequest(method, path string, binaryRequestBody *common.OCIReadSeekCloser) (http.Request, error) {
return common.MakeDefaultHTTPRequestWithTaggedStruct(method, path, request)
}
// BinaryRequestBody implements the OCIRequest interface
func (request ListWorkRequestsRequest) BinaryRequestBody() (*common.OCIReadSeekCloser, bool) {
return nil, false
}
// RetryPolicy implements the OCIRetryableRequest interface. This retrieves the specified retry policy.
func (request ListWorkRequestsRequest) RetryPolicy() *common.RetryPolicy {
return request.RequestMetadata.RetryPolicy
}
// ListWorkRequestsResponse wrapper for the ListWorkRequests operation
type ListWorkRequestsResponse struct {
// The underlying http response
RawResponse *http.Response
// A list of []WorkRequestSummary instances
Items []WorkRequestSummary `presentIn:"body"`
// For list pagination. When this header appears in the response, additional pages of
// results remain. For important details about how pagination works, see
// List Pagination (https://docs.cloud.oracle.com/iaas/Content/API/Concepts/usingapi.htm#nine).
OpcNextPage *string `presentIn:"header" name:"opc-next-page"`
// Unique Oracle-assigned identifier for the request. If you need to contact Oracle about a
// particular request, please provide the request ID.
OpcRequestId *string `presentIn:"header" name:"opc-request-id"`
}
func (response ListWorkRequestsResponse) String() string {
return common.PointerString(response)
}
// HTTPResponse implements the OCIResponse interface
func (response ListWorkRequestsResponse) HTTPResponse() *http.Response {
return response.RawResponse
}

View File

@ -0,0 +1,87 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
)
// WorkRequest An asynchronous work request.
type WorkRequest struct {
// The asynchronous operation tracked by this work request.
OperationType *string `mandatory:"true" json:"operationType"`
// The status of the work request.
Status WorkRequestStatusEnum `mandatory:"true" json:"status"`
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
Id *string `mandatory:"true" json:"id"`
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment
// that contains the work request.
CompartmentId *string `mandatory:"true" json:"compartmentId"`
// The resources that are affected by this work request.
Resources []WorkRequestResource `mandatory:"true" json:"resources"`
// The percentage complete of the operation tracked by this work request.
PercentComplete *float32 `mandatory:"true" json:"percentComplete"`
// The date and time the work request was created, in the format defined by RFC3339.
TimeAccepted *common.SDKTime `mandatory:"true" json:"timeAccepted"`
// The date and time the work request transitioned from `ACCEPTED` to `IN_PROGRESS`,
// in the format defined by RFC3339.
TimeStarted *common.SDKTime `mandatory:"false" json:"timeStarted"`
// The date and time the work request reached a terminal state, either `FAILED` or `SUCCEEDED`.
// Format is defined by RFC3339.
TimeFinished *common.SDKTime `mandatory:"false" json:"timeFinished"`
}
func (m WorkRequest) String() string {
return common.PointerString(m)
}
// WorkRequestStatusEnum Enum with underlying type: string
type WorkRequestStatusEnum string
// Set of constants representing the allowable values for WorkRequestStatusEnum
const (
WorkRequestStatusAccepted WorkRequestStatusEnum = "ACCEPTED"
WorkRequestStatusInProgress WorkRequestStatusEnum = "IN_PROGRESS"
WorkRequestStatusFailed WorkRequestStatusEnum = "FAILED"
WorkRequestStatusSucceeded WorkRequestStatusEnum = "SUCCEEDED"
WorkRequestStatusCanceling WorkRequestStatusEnum = "CANCELING"
WorkRequestStatusCanceled WorkRequestStatusEnum = "CANCELED"
)
var mappingWorkRequestStatus = map[string]WorkRequestStatusEnum{
"ACCEPTED": WorkRequestStatusAccepted,
"IN_PROGRESS": WorkRequestStatusInProgress,
"FAILED": WorkRequestStatusFailed,
"SUCCEEDED": WorkRequestStatusSucceeded,
"CANCELING": WorkRequestStatusCanceling,
"CANCELED": WorkRequestStatusCanceled,
}
// GetWorkRequestStatusEnumValues Enumerates the set of values for WorkRequestStatusEnum
func GetWorkRequestStatusEnumValues() []WorkRequestStatusEnum {
values := make([]WorkRequestStatusEnum, 0)
for _, v := range mappingWorkRequestStatus {
values = append(values, v)
}
return values
}

View File

@ -0,0 +1,35 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
)
// WorkRequestError An error encountered while executing an operation that is tracked by a work request.
type WorkRequestError struct {
// A machine-usable code for the error that occured.
Code *string `mandatory:"true" json:"code"`
// A human-readable error string.
Message *string `mandatory:"true" json:"message"`
// The date and time the error occurred.
Timestamp *common.SDKTime `mandatory:"true" json:"timestamp"`
}
func (m WorkRequestError) String() string {
return common.PointerString(m)
}

View File

@ -0,0 +1,32 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
)
// WorkRequestLogEntry A log message from executing an operation that is tracked by a work request.
type WorkRequestLogEntry struct {
// A human-readable log message.
Message *string `mandatory:"true" json:"message"`
// The date and time the log message was written.
Timestamp *common.SDKTime `mandatory:"true" json:"timestamp"`
}
func (m WorkRequestLogEntry) String() string {
return common.PointerString(m)
}

View File

@ -0,0 +1,70 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
)
// WorkRequestResource A resource that is created or operated on by an asynchronous operation that is tracked by
// a work request.
type WorkRequestResource struct {
// The way in which this resource was affected by the operation that spawned the work
// request.
ActionType WorkRequestResourceActionTypeEnum `mandatory:"true" json:"actionType"`
// The resource type the work request affects.
EntityType *string `mandatory:"true" json:"entityType"`
// An OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) or other unique identifier for the
// resource.
Identifier *string `mandatory:"true" json:"identifier"`
// The URI path that you can use for a GET request to access the resource metadata.
EntityUri *string `mandatory:"false" json:"entityUri"`
}
func (m WorkRequestResource) String() string {
return common.PointerString(m)
}
// WorkRequestResourceActionTypeEnum Enum with underlying type: string
type WorkRequestResourceActionTypeEnum string
// Set of constants representing the allowable values for WorkRequestResourceActionTypeEnum
const (
WorkRequestResourceActionTypeCreated WorkRequestResourceActionTypeEnum = "CREATED"
WorkRequestResourceActionTypeUpdated WorkRequestResourceActionTypeEnum = "UPDATED"
WorkRequestResourceActionTypeDeleted WorkRequestResourceActionTypeEnum = "DELETED"
WorkRequestResourceActionTypeRelated WorkRequestResourceActionTypeEnum = "RELATED"
WorkRequestResourceActionTypeInProgress WorkRequestResourceActionTypeEnum = "IN_PROGRESS"
)
var mappingWorkRequestResourceActionType = map[string]WorkRequestResourceActionTypeEnum{
"CREATED": WorkRequestResourceActionTypeCreated,
"UPDATED": WorkRequestResourceActionTypeUpdated,
"DELETED": WorkRequestResourceActionTypeDeleted,
"RELATED": WorkRequestResourceActionTypeRelated,
"IN_PROGRESS": WorkRequestResourceActionTypeInProgress,
}
// GetWorkRequestResourceActionTypeEnumValues Enumerates the set of values for WorkRequestResourceActionTypeEnum
func GetWorkRequestResourceActionTypeEnumValues() []WorkRequestResourceActionTypeEnum {
values := make([]WorkRequestResourceActionTypeEnum, 0)
for _, v := range mappingWorkRequestResourceActionType {
values = append(values, v)
}
return values
}

View File

@ -0,0 +1,84 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
)
// WorkRequestSummary A summary of the status of a work request.
type WorkRequestSummary struct {
// The asynchronous operation tracked by this work request.
OperationType *string `mandatory:"true" json:"operationType"`
// The status of the work request.
Status WorkRequestSummaryStatusEnum `mandatory:"true" json:"status"`
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the work request.
Id *string `mandatory:"true" json:"id"`
// The OCID (https://docs.cloud.oracle.com/iaas/Content/General/Concepts/identifiers.htm) of the compartment containing
// this work request.
CompartmentId *string `mandatory:"true" json:"compartmentId"`
// The percentage complete of the operation tracked by this work request.
PercentComplete *float32 `mandatory:"true" json:"percentComplete"`
// The date and time the work request was created, in the format defined by RFC3339.
TimeAccepted *common.SDKTime `mandatory:"true" json:"timeAccepted"`
// The date and time the work request transitioned from `ACCEPTED` to `IN_PROGRESS`, in
// the format defined by RFC3339.
TimeStarted *common.SDKTime `mandatory:"false" json:"timeStarted"`
// The date and time the work request reached a terminal state, either `FAILED` or `SUCCEEDED`.
// Format is defined by RFC3339.
TimeFinished *common.SDKTime `mandatory:"false" json:"timeFinished"`
}
func (m WorkRequestSummary) String() string {
return common.PointerString(m)
}
// WorkRequestSummaryStatusEnum Enum with underlying type: string
type WorkRequestSummaryStatusEnum string
// Set of constants representing the allowable values for WorkRequestSummaryStatusEnum
const (
WorkRequestSummaryStatusAccepted WorkRequestSummaryStatusEnum = "ACCEPTED"
WorkRequestSummaryStatusInProgress WorkRequestSummaryStatusEnum = "IN_PROGRESS"
WorkRequestSummaryStatusFailed WorkRequestSummaryStatusEnum = "FAILED"
WorkRequestSummaryStatusSucceeded WorkRequestSummaryStatusEnum = "SUCCEEDED"
WorkRequestSummaryStatusCanceling WorkRequestSummaryStatusEnum = "CANCELING"
WorkRequestSummaryStatusCanceled WorkRequestSummaryStatusEnum = "CANCELED"
)
var mappingWorkRequestSummaryStatus = map[string]WorkRequestSummaryStatusEnum{
"ACCEPTED": WorkRequestSummaryStatusAccepted,
"IN_PROGRESS": WorkRequestSummaryStatusInProgress,
"FAILED": WorkRequestSummaryStatusFailed,
"SUCCEEDED": WorkRequestSummaryStatusSucceeded,
"CANCELING": WorkRequestSummaryStatusCanceling,
"CANCELED": WorkRequestSummaryStatusCanceled,
}
// GetWorkRequestSummaryStatusEnumValues Enumerates the set of values for WorkRequestSummaryStatusEnum
func GetWorkRequestSummaryStatusEnumValues() []WorkRequestSummaryStatusEnum {
values := make([]WorkRequestSummaryStatusEnum, 0)
for _, v := range mappingWorkRequestSummaryStatus {
values = append(values, v)
}
return values
}

View File

@ -0,0 +1,301 @@
// Copyright (c) 2016, 2018, 2021, Oracle and/or its affiliates. All rights reserved.
// This software is dual-licensed to you under the Universal Permissive License (UPL) 1.0 as shown at https://oss.oracle.com/licenses/upl or Apache License 2.0 as shown at http://www.apache.org/licenses/LICENSE-2.0. You may choose either license.
// Code generated. DO NOT EDIT.
// Work Requests API
//
// Many of the API operations that you use to create and configure Compute resources do not take effect
// immediately. In these cases, the operation spawns an asynchronous workflow to fulfill the request.
// Work requests provide visibility into the status of these in-progress, long-running workflows.
// For more information about work requests and the operations that spawn work requests, see
// Viewing the State of a Compute Work Request (https://docs.cloud.oracle.com/iaas/Content/Compute/Tasks/viewingworkrequestcompute.htm).
//
package workrequests
import (
"context"
"fmt"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth"
"net/http"
)
// WorkRequestClient a client for WorkRequest
type WorkRequestClient struct {
common.BaseClient
config *common.ConfigurationProvider
}
// NewWorkRequestClientWithConfigurationProvider Creates a new default WorkRequest client with the given configuration provider.
// the configuration provider will be used for the default signer as well as reading the region
func NewWorkRequestClientWithConfigurationProvider(configProvider common.ConfigurationProvider) (client WorkRequestClient, err error) {
provider, err := auth.GetGenericConfigurationProvider(configProvider)
if err != nil {
return client, err
}
baseClient, e := common.NewClientWithConfig(provider)
if e != nil {
return client, e
}
return newWorkRequestClientFromBaseClient(baseClient, provider)
}
// NewWorkRequestClientWithOboToken Creates a new default WorkRequest client with the given configuration provider.
// The obotoken will be added to default headers and signed; the configuration provider will be used for the signer
//
// as well as reading the region
func NewWorkRequestClientWithOboToken(configProvider common.ConfigurationProvider, oboToken string) (client WorkRequestClient, err error) {
baseClient, err := common.NewClientWithOboToken(configProvider, oboToken)
if err != nil {
return client, err
}
return newWorkRequestClientFromBaseClient(baseClient, configProvider)
}
func newWorkRequestClientFromBaseClient(baseClient common.BaseClient, configProvider common.ConfigurationProvider) (client WorkRequestClient, err error) {
client = WorkRequestClient{BaseClient: baseClient}
client.BasePath = "20160918"
err = client.setConfigurationProvider(configProvider)
return
}
// SetRegion overrides the region of this client.
func (client *WorkRequestClient) SetRegion(region string) {
client.Host = common.StringToRegion(region).EndpointForTemplate("workrequests", "https://iaas.{region}.{secondLevelDomain}")
}
// SetConfigurationProvider sets the configuration provider including the region, returns an error if is not valid
func (client *WorkRequestClient) setConfigurationProvider(configProvider common.ConfigurationProvider) error {
if ok, err := common.IsConfigurationProviderValid(configProvider); !ok {
return err
}
// Error has been checked already
region, _ := configProvider.Region()
client.SetRegion(region)
client.config = &configProvider
return nil
}
// ConfigurationProvider the ConfigurationProvider used in this client, or null if none set
func (client *WorkRequestClient) ConfigurationProvider() *common.ConfigurationProvider {
return client.config
}
// GetWorkRequest Gets the details of a work request.
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/GetWorkRequest.go.html to see an example of how to use GetWorkRequest API.
func (client WorkRequestClient) GetWorkRequest(ctx context.Context, request GetWorkRequestRequest) (response GetWorkRequestResponse, err error) {
var ociResponse common.OCIResponse
policy := common.NoRetryPolicy()
if client.RetryPolicy() != nil {
policy = *client.RetryPolicy()
}
if request.RetryPolicy() != nil {
policy = *request.RetryPolicy()
}
ociResponse, err = common.Retry(ctx, request, client.getWorkRequest, policy)
if err != nil {
if ociResponse != nil {
if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
opcRequestId := httpResponse.Header.Get("opc-request-id")
response = GetWorkRequestResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
} else {
response = GetWorkRequestResponse{}
}
}
return
}
if convertedResponse, ok := ociResponse.(GetWorkRequestResponse); ok {
response = convertedResponse
} else {
err = fmt.Errorf("failed to convert OCIResponse into GetWorkRequestResponse")
}
return
}
// getWorkRequest implements the OCIOperation interface (enables retrying operations)
func (client WorkRequestClient) getWorkRequest(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}", binaryReqBody)
if err != nil {
return nil, err
}
var response GetWorkRequestResponse
var httpResponse *http.Response
httpResponse, err = client.Call(ctx, &httpRequest)
defer common.CloseBodyIfValid(httpResponse)
response.RawResponse = httpResponse
if err != nil {
return response, err
}
err = common.UnmarshalResponse(httpResponse, &response)
return response, err
}
// ListWorkRequestErrors Gets the errors for a work request.
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestErrors.go.html to see an example of how to use ListWorkRequestErrors API.
func (client WorkRequestClient) ListWorkRequestErrors(ctx context.Context, request ListWorkRequestErrorsRequest) (response ListWorkRequestErrorsResponse, err error) {
var ociResponse common.OCIResponse
policy := common.NoRetryPolicy()
if client.RetryPolicy() != nil {
policy = *client.RetryPolicy()
}
if request.RetryPolicy() != nil {
policy = *request.RetryPolicy()
}
ociResponse, err = common.Retry(ctx, request, client.listWorkRequestErrors, policy)
if err != nil {
if ociResponse != nil {
if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
opcRequestId := httpResponse.Header.Get("opc-request-id")
response = ListWorkRequestErrorsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
} else {
response = ListWorkRequestErrorsResponse{}
}
}
return
}
if convertedResponse, ok := ociResponse.(ListWorkRequestErrorsResponse); ok {
response = convertedResponse
} else {
err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestErrorsResponse")
}
return
}
// listWorkRequestErrors implements the OCIOperation interface (enables retrying operations)
func (client WorkRequestClient) listWorkRequestErrors(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}/errors", binaryReqBody)
if err != nil {
return nil, err
}
var response ListWorkRequestErrorsResponse
var httpResponse *http.Response
httpResponse, err = client.Call(ctx, &httpRequest)
defer common.CloseBodyIfValid(httpResponse)
response.RawResponse = httpResponse
if err != nil {
return response, err
}
err = common.UnmarshalResponse(httpResponse, &response)
return response, err
}
// ListWorkRequestLogs Gets the logs for a work request.
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequestLogs.go.html to see an example of how to use ListWorkRequestLogs API.
func (client WorkRequestClient) ListWorkRequestLogs(ctx context.Context, request ListWorkRequestLogsRequest) (response ListWorkRequestLogsResponse, err error) {
var ociResponse common.OCIResponse
policy := common.NoRetryPolicy()
if client.RetryPolicy() != nil {
policy = *client.RetryPolicy()
}
if request.RetryPolicy() != nil {
policy = *request.RetryPolicy()
}
ociResponse, err = common.Retry(ctx, request, client.listWorkRequestLogs, policy)
if err != nil {
if ociResponse != nil {
if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
opcRequestId := httpResponse.Header.Get("opc-request-id")
response = ListWorkRequestLogsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
} else {
response = ListWorkRequestLogsResponse{}
}
}
return
}
if convertedResponse, ok := ociResponse.(ListWorkRequestLogsResponse); ok {
response = convertedResponse
} else {
err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestLogsResponse")
}
return
}
// listWorkRequestLogs implements the OCIOperation interface (enables retrying operations)
func (client WorkRequestClient) listWorkRequestLogs(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests/{workRequestId}/logs", binaryReqBody)
if err != nil {
return nil, err
}
var response ListWorkRequestLogsResponse
var httpResponse *http.Response
httpResponse, err = client.Call(ctx, &httpRequest)
defer common.CloseBodyIfValid(httpResponse)
response.RawResponse = httpResponse
if err != nil {
return response, err
}
err = common.UnmarshalResponse(httpResponse, &response)
return response, err
}
// ListWorkRequests Lists the work requests in a compartment or for a specified resource.
//
// # See also
//
// Click https://docs.cloud.oracle.com/en-us/iaas/tools/go-sdk-examples/latest/workrequests/ListWorkRequests.go.html to see an example of how to use ListWorkRequests API.
func (client WorkRequestClient) ListWorkRequests(ctx context.Context, request ListWorkRequestsRequest) (response ListWorkRequestsResponse, err error) {
var ociResponse common.OCIResponse
policy := common.NoRetryPolicy()
if client.RetryPolicy() != nil {
policy = *client.RetryPolicy()
}
if request.RetryPolicy() != nil {
policy = *request.RetryPolicy()
}
ociResponse, err = common.Retry(ctx, request, client.listWorkRequests, policy)
if err != nil {
if ociResponse != nil {
if httpResponse := ociResponse.HTTPResponse(); httpResponse != nil {
opcRequestId := httpResponse.Header.Get("opc-request-id")
response = ListWorkRequestsResponse{RawResponse: httpResponse, OpcRequestId: &opcRequestId}
} else {
response = ListWorkRequestsResponse{}
}
}
return
}
if convertedResponse, ok := ociResponse.(ListWorkRequestsResponse); ok {
response = convertedResponse
} else {
err = fmt.Errorf("failed to convert OCIResponse into ListWorkRequestsResponse")
}
return
}
// listWorkRequests implements the OCIOperation interface (enables retrying operations)
func (client WorkRequestClient) listWorkRequests(ctx context.Context, request common.OCIRequest, binaryReqBody *common.OCIReadSeekCloser) (common.OCIResponse, error) {
httpRequest, err := request.HTTPRequest(http.MethodGet, "/workRequests", binaryReqBody)
if err != nil {
return nil, err
}
var response ListWorkRequestsResponse
var httpResponse *http.Response
httpResponse, err = client.Call(ctx, &httpRequest)
defer common.CloseBodyIfValid(httpResponse)
response.RawResponse = httpResponse
if err != nil {
return response, err
}
err = common.UnmarshalResponse(httpResponse, &response)
return response, err
}

View File

@ -96,6 +96,11 @@ func (ocp *OciCloudProvider) NodeGroupForNode(n *apiv1.Node) (cloudprovider.Node
return ng, err
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (ocp *OciCloudProvider) HasInstance(n *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (ocp *OciCloudProvider) Pricing() (cloudprovider.PricingModel, caerrors.AutoscalerError) {

View File

@ -35,6 +35,9 @@ const (
instanceIDLabelSuffix = "instance-id_suffix"
ociInstancePoolIDAnnotation = "oci.oraclecloud.com/instancepool-id"
ociInstancePoolResourceIdent = "instancepool"
ociInstancePoolLaunchOp = "LaunchInstancesInPool"
instanceStateUnfulfilled = "Unfulfilled"
instanceIDUnfulfilled = "instance_placeholder"
// Overload ociInstancePoolIDAnnotation to indicate a kubernetes node doesn't belong to any OCI Instance Pool.
ociInstancePoolIDNonPoolMember = "non_pool_member"

View File

@ -18,10 +18,12 @@ package oci
import (
"context"
"fmt"
"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
"k8s.io/klog/v2"
"math"
"strings"
@ -48,6 +50,13 @@ type VirtualNetworkClient interface {
GetVnic(context.Context, core.GetVnicRequest) (core.GetVnicResponse, error)
}
// WorkRequestClient wraps workrequests.WorkRequestClient exposing the functions we actually require.
type WorkRequestClient interface {
GetWorkRequest(context.Context, workrequests.GetWorkRequestRequest) (workrequests.GetWorkRequestResponse, error)
ListWorkRequests(context.Context, workrequests.ListWorkRequestsRequest) (workrequests.ListWorkRequestsResponse, error)
ListWorkRequestErrors(context.Context, workrequests.ListWorkRequestErrorsRequest) (workrequests.ListWorkRequestErrorsResponse, error)
}
type instancePoolCache struct {
mu sync.Mutex
poolCache map[string]*core.InstancePool
@ -57,9 +66,10 @@ type instancePoolCache struct {
computeManagementClient ComputeMgmtClient
computeClient ComputeClient
virtualNetworkClient VirtualNetworkClient
workRequestsClient WorkRequestClient
}
func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient) *instancePoolCache {
func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClient ComputeClient, virtualNetworkClient VirtualNetworkClient, workRequestsClient WorkRequestClient) *instancePoolCache {
return &instancePoolCache{
poolCache: map[string]*core.InstancePool{},
instanceSummaryCache: map[string]*[]core.InstanceSummary{},
@ -67,6 +77,7 @@ func newInstancePoolCache(computeManagementClient ComputeMgmtClient, computeClie
computeManagementClient: computeManagementClient,
computeClient: computeClient,
virtualNetworkClient: virtualNetworkClient,
workRequestsClient: workRequestsClient,
}
}
@ -82,16 +93,16 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
// Since we only support static instance-pools we don't need to worry about pruning.
for id := range staticInstancePools {
resp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
getInstancePoolResp, err := c.computeManagementClient.GetInstancePool(context.Background(), core.GetInstancePoolRequest{
InstancePoolId: common.String(id),
})
if err != nil {
klog.Errorf("get instance pool %s failed: %v", id, err)
return err
}
klog.V(6).Infof("GetInstancePool() response %v", resp.InstancePool)
klog.V(6).Infof("GetInstancePool() response %v", getInstancePoolResp.InstancePool)
c.setInstancePool(&resp.InstancePool)
c.setInstancePool(&getInstancePoolResp.InstancePool)
var instanceSummaries []core.InstanceSummary
var page *string
@ -112,7 +123,32 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
break
}
}
c.setInstanceSummaries(*resp.InstancePool.Id, &instanceSummaries)
c.setInstanceSummaries(id, &instanceSummaries)
// Compare instance pool's size with the latest number of InstanceSummaries. If found, look for unrecoverable
// errors such as quota or capacity issues in scaling pool.
if len(*c.instanceSummaryCache[id]) < *c.poolCache[id].Size {
klog.V(4).Infof("Instance pool %s has only %d instances created while requested count is %d. ",
*getInstancePoolResp.InstancePool.DisplayName, len(*c.instanceSummaryCache[id]), *c.poolCache[id].Size)
if getInstancePoolResp.LifecycleState != core.InstancePoolLifecycleStateRunning {
lastWorkRequest, err := c.lastStartedWorkRequest(*getInstancePoolResp.CompartmentId, id)
// The last started work request may be many minutes old depending on sync interval
// and exponential backoff time of OCI retried OCI operations.
if err == nil && *lastWorkRequest.OperationType == ociInstancePoolLaunchOp &&
lastWorkRequest.Status == workrequests.WorkRequestSummaryStatusFailed {
unrecoverableErrorMsg := c.firstUnrecoverableErrorForWorkRequest(*lastWorkRequest.Id)
if unrecoverableErrorMsg != "" {
klog.V(4).Infof("Creating placeholder instances for %s.", *getInstancePoolResp.InstancePool.DisplayName)
for i := len(*c.instanceSummaryCache[id]); i < *c.poolCache[id].Size; i++ {
c.addUnfulfilledInstanceToCache(id, fmt.Sprintf("%s%s-%d", instanceIDUnfulfilled,
*getInstancePoolResp.InstancePool.Id, i), *getInstancePoolResp.InstancePool.CompartmentId,
fmt.Sprintf("%s-%d", *getInstancePoolResp.InstancePool.DisplayName, i))
}
}
}
}
}
}
// Reset unowned instances cache.
@ -121,6 +157,15 @@ func (c *instancePoolCache) rebuild(staticInstancePools map[string]*InstancePool
return nil
}
func (c *instancePoolCache) addUnfulfilledInstanceToCache(instancePoolID, instanceID, compartmentID, name string) {
*c.instanceSummaryCache[instancePoolID] = append(*c.instanceSummaryCache[instancePoolID], core.InstanceSummary{
Id: common.String(instanceID),
CompartmentId: common.String(compartmentID),
State: common.String(instanceStateUnfulfilled),
DisplayName: common.String(name),
})
}
// removeInstance tries to remove the instance from the specified instance pool. If the instance isn't in the array,
// then it won't do anything removeInstance returns true if it actually removed the instance and reduced the size of
// the instance pool.
@ -131,7 +176,12 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
return false
}
_, err := c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{
var err error
if strings.Contains(instanceID, instanceIDUnfulfilled) {
// For an unfulfilled instance, reduce the target size of the instance pool and remove the placeholder instance from cache.
err = c.setSize(instancePool.Id(), *c.poolCache[instancePool.Id()].Size-1)
} else {
_, err = c.computeManagementClient.DetachInstancePoolInstance(context.Background(), core.DetachInstancePoolInstanceRequest{
InstancePoolId: common.String(instancePool.Id()),
DetachInstancePoolInstanceDetails: core.DetachInstancePoolInstanceDetails{
InstanceId: common.String(instanceID),
@ -139,11 +189,14 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
IsAutoTerminate: common.Bool(true),
},
})
}
if err == nil {
c.mu.Lock()
// Decrease pool size in cache since IsDecrementSize was true
// Decrease pool size in cache
c.poolCache[instancePool.Id()].Size = common.Int(*c.poolCache[instancePool.Id()].Size - 1)
// Since we're removing the instance from cache, we don't need to expire the pool cache
c.removeInstanceSummaryFromCache(instancePool.Id(), instanceID)
c.mu.Unlock()
return true
}
@ -156,6 +209,12 @@ func (c *instancePoolCache) removeInstance(instancePool InstancePoolNodeGroup, i
// through the configured instance-pools (ListInstancePoolInstances) for a match.
func (c *instancePoolCache) findInstanceByDetails(ociInstance OciRef) (*OciRef, error) {
// Unfilled instance placeholder
if strings.Contains(ociInstance.Name, instanceIDUnfulfilled) {
instIndex := strings.LastIndex(ociInstance.Name, "-")
ociInstance.PoolID = strings.Replace(ociInstance.Name[:instIndex], instanceIDUnfulfilled, "", 1)
return &ociInstance, nil
}
// Minimum amount of information we need to make a positive match
if ociInstance.InstanceID == "" && ociInstance.PrivateIPAddress == "" && ociInstance.PublicIPAddress == "" {
return nil, errors.New("instance id or an IP address is required to resolve details")
@ -321,6 +380,7 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
return err
}
isScaleUp := size > *getInstancePoolResp.Size
scaleDelta := int(math.Abs(float64(*getInstancePoolResp.Size - size)))
updateDetails := core.UpdateInstancePoolDetails{
@ -336,17 +396,25 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
return err
}
c.mu.Lock()
c.poolCache[instancePoolID].Size = common.Int(size)
c.mu.Unlock()
// Just return Immediately if this was a scale down to be consistent with DetachInstancePoolInstance
if !isScaleUp {
return nil
}
// Only wait for scale up (not scale down)
ctx := context.Background()
ctx, cancelFunc := context.WithTimeout(ctx, maxScalingWaitTime(scaleDelta, 20, 10*time.Minute))
// Ensure this context is always canceled so channels, go routines, etc. always complete.
defer cancelFunc()
// Wait for the number of Running instances in this pool to reach size
err = c.waitForRunningInstanceCount(ctx, size, instancePoolID, *getInstancePoolResp.CompartmentId)
if err != nil {
return err
}
// Allow an additional time for the pool State to reach Running
ctx, _ = context.WithTimeout(ctx, 10*time.Minute)
err = c.waitForState(ctx, instancePoolID, core.InstancePoolLifecycleStateRunning)
@ -354,10 +422,6 @@ func (c *instancePoolCache) setSize(instancePoolID string, size int) error {
return err
}
c.mu.Lock()
c.poolCache[instancePoolID].Size = common.Int(size)
c.mu.Unlock()
return nil
}
@ -446,10 +510,21 @@ func (c *instancePoolCache) monitorScalingProgress(ctx context.Context, target i
return
}
// Fail scale (up) operation fast by watching for unrecoverable errors such as quota or capacity issues
lastWorkRequest, err := c.lastStartedWorkRequest(compartmentID, instancePoolID)
if err == nil && *lastWorkRequest.OperationType == ociInstancePoolLaunchOp &&
lastWorkRequest.Status == workrequests.WorkRequestSummaryStatusInProgress {
unrecoverableErrorMsg := c.firstUnrecoverableErrorForWorkRequest(*lastWorkRequest.Id)
if unrecoverableErrorMsg != "" {
errCh <- errors.New(unrecoverableErrorMsg)
return
}
}
var page *string
numRunningInstances := 0
for {
// List instances in the pool
// Next, wait until the number of instances in the pool reaches the target
listInstancePoolInstances, err := c.computeManagementClient.ListInstancePoolInstances(context.Background(), core.ListInstancePoolInstancesRequest{
InstancePoolId: common.String(instancePoolID),
CompartmentId: common.String(compartmentID),
@ -509,6 +584,21 @@ func (c *instancePoolCache) getSize(id string) (int, error) {
return *pool.Size, nil
}
// removeInstanceSummaryFromCache removes looks through the pool cache for an InstanceSummary with the specified ID and
// removes it if found
func (c *instancePoolCache) removeInstanceSummaryFromCache(instancePoolID, instanceID string) {
var instanceSummaries []core.InstanceSummary
if instanceSummaryCache, found := c.instanceSummaryCache[instancePoolID]; found {
for _, instanceSummary := range *instanceSummaryCache {
if instanceSummary.Id != nil && *instanceSummary.Id != instanceID {
instanceSummaries = append(instanceSummaries, instanceSummary)
}
}
c.instanceSummaryCache[instancePoolID] = &instanceSummaries
}
}
// maxScalingWaitTime estimates the maximum amount of time, as a duration, that to scale size instances.
// note, larger scale operations are broken up internally to smaller batches. This is an internal detail
// and can be overridden on a tenancy basis. 20 is a good default.
@ -528,3 +618,57 @@ func maxScalingWaitTime(size, batchSize int, timePerBatch time.Duration) time.Du
return maxScalingWaitTime + buffer
}
// lastStartedWorkRequest returns the *last started* work request for the specified resource or an error if none are found
func (c *instancePoolCache) lastStartedWorkRequest(compartmentID, resourceID string) (workrequests.WorkRequestSummary, error) {
klog.V(6).Infof("Looking for the last started work request for resource %s.", resourceID)
listWorkRequests, err := c.workRequestsClient.ListWorkRequests(context.Background(), workrequests.ListWorkRequestsRequest{
CompartmentId: common.String(compartmentID),
Limit: common.Int(100),
ResourceId: common.String(resourceID),
})
if err != nil {
klog.Errorf("list work requests for %s failed: %v", resourceID, err)
return workrequests.WorkRequestSummary{}, err
}
var lastStartedWorkRequest = workrequests.WorkRequestSummary{}
for i, nextWorkRequest := range listWorkRequests.Items {
if i == 0 && nextWorkRequest.TimeStarted != nil {
lastStartedWorkRequest = nextWorkRequest
} else {
if nextWorkRequest.TimeStarted != nil && nextWorkRequest.TimeStarted.After(lastStartedWorkRequest.TimeStarted.Time) {
lastStartedWorkRequest = nextWorkRequest
}
}
}
if lastStartedWorkRequest.TimeStarted != nil {
return lastStartedWorkRequest, nil
}
return workrequests.WorkRequestSummary{}, errors.New("no work requests found")
}
// firstUnrecoverableErrorForWorkRequest returns the first non-recoverable error message associated with the specified
// work-request ID, or the empty string if none are found.
func (c *instancePoolCache) firstUnrecoverableErrorForWorkRequest(workRequestID string) string {
klog.V(6).Infof("Looking for non-recoverable errors for work request %s.", workRequestID)
// Look through the error logs looking for known unrecoverable error messages(s)
workRequestErrors, _ := c.workRequestsClient.ListWorkRequestErrors(context.Background(),
workrequests.ListWorkRequestErrorsRequest{WorkRequestId: common.String(workRequestID),
SortOrder: workrequests.ListWorkRequestErrorsSortOrderDesc})
for _, nextErr := range workRequestErrors.Items {
// Abort wait for certain unrecoverable errors such as capacity and quota issues
if strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("QuotaExceeded")) ||
strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("LimitExceeded")) ||
strings.Contains(strings.ToLower(*nextErr.Message), strings.ToLower("OutOfCapacity")) {
klog.V(4).Infof("Found unrecoverable error(s) in work request %s.", workRequestID)
return *nextErr.Message
}
}
klog.V(6).Infof("No non-recoverable errors for work request %s found.", workRequestID)
return ""
}

View File

@ -18,19 +18,19 @@ package oci
import (
"fmt"
"gopkg.in/gcfg.v1"
"os"
"strconv"
"strings"
"time"
"gopkg.in/gcfg.v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
@ -38,6 +38,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common/auth"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
)
var (
@ -163,6 +164,12 @@ func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovid
}
networkClient.SetCustomClientConfiguration(clientConfig)
workRequestClient, err := workrequests.NewWorkRequestClientWithConfigurationProvider(configProvider)
if err != nil {
return nil, errors.Wrap(err, "unable to create work request client")
}
workRequestClient.SetCustomClientConfiguration(clientConfig)
cloudConfig.Global.CompartmentID = os.Getenv(ociCompartmentEnvVar)
// Not passed by --cloud-config or environment variable, attempt to use the tenancy ID as the compartment ID
@ -178,7 +185,7 @@ func CreateInstancePoolManager(cloudConfigPath string, discoveryOpts cloudprovid
cfg: cloudConfig,
staticInstancePools: map[string]*InstancePoolNodeGroup{},
shapeGetter: createShapeGetter(ShapeClientImpl{computeMgmtClient: computeMgmtClient, computeClient: computeClient}),
instancePoolCache: newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient),
instancePoolCache: newInstancePoolCache(&computeMgmtClient, &computeClient, &networkClient, &workRequestClient),
kubeClient: kubeClient,
}
@ -270,6 +277,18 @@ func (m *InstancePoolManagerImpl) forceRefresh() error {
return nil
}
func (m *InstancePoolManagerImpl) forceRefreshInstancePool(instancePoolID string) error {
if m.cfg == nil {
return errors.New("instance pool manager does have a required config")
}
if instancePoolCache, found := m.staticInstancePools[instancePoolID]; found {
return m.instancePoolCache.rebuild(map[string]*InstancePoolNodeGroup{instancePoolID: instancePoolCache}, *m.cfg)
}
return errors.New("instance pool not found")
}
// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
func (m *InstancePoolManagerImpl) Cleanup() error {
return nil
@ -287,7 +306,7 @@ func (m *InstancePoolManagerImpl) GetInstancePools() []*InstancePoolNodeGroup {
// GetInstancePoolNodes returns InstancePool nodes that are not in a terminal state.
func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup) ([]cloudprovider.Instance, error) {
klog.V(4).Infof("getting instances for node pool: %q", ip.Id())
klog.V(4).Infof("getting (cached) instances for node pool: %q", ip.Id())
instanceSummaries, err := m.instancePoolCache.getInstanceSummaries(ip.Id())
if err != nil {
@ -312,6 +331,13 @@ func (m *InstancePoolManagerImpl) GetInstancePoolNodes(ip InstancePoolNodeGroup)
status.State = cloudprovider.InstanceDeleting
case string(core.InstanceLifecycleStateStopping):
status.State = cloudprovider.InstanceDeleting
case instanceStateUnfulfilled:
status.State = cloudprovider.InstanceCreating
status.ErrorInfo = &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
ErrorCode: instanceStateUnfulfilled,
ErrorMessage: "OCI cannot provision additional instances for this instance pool. Review quota and/or capacity.",
}
}
// Instance not in a terminal or unknown state, ok to add.
@ -390,10 +416,14 @@ func (m *InstancePoolManagerImpl) GetInstancePoolSize(ip InstancePoolNodeGroup)
// SetInstancePoolSize sets instance-pool size.
func (m *InstancePoolManagerImpl) SetInstancePoolSize(np InstancePoolNodeGroup, size int) error {
klog.Infof("SetInstancePoolSize (%d) called on instance pool %s", size, np.Id())
err := m.instancePoolCache.setSize(np.Id(), size)
if err != nil {
return err
setSizeErr := m.instancePoolCache.setSize(np.Id(), size)
klog.V(5).Infof("SetInstancePoolSize was called: refreshing instance pool cache")
// refresh instance pool cache after update (regardless if there was an error or not)
_ = m.forceRefreshInstancePool(np.Id())
if setSizeErr != nil {
return setSizeErr
}
// Interface says this function should wait until node group size is updated.
@ -496,10 +526,8 @@ func getInstancePoolAvailabilityDomain(ip *core.InstancePool) (string, error) {
func buildGenericLabelsForInstancePool(instancePool *core.InstancePool, nodeName, shape, availabilityDomain string) map[string]string {
result := make(map[string]string)
result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch
result[kubeletapis.LabelOS] = cloudprovider.DefaultOS
result[apiv1.LabelOSStable] = cloudprovider.DefaultOS
parts := strings.Split(*instancePool.Id, ".")

View File

@ -3,6 +3,7 @@ package oci
import (
"context"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/workrequests"
"reflect"
"testing"
@ -29,6 +30,22 @@ type mockComputeClient struct {
listVnicAttachmentsResponse core.ListVnicAttachmentsResponse
}
type mockWorkRequestClient struct {
err error
}
func (m *mockWorkRequestClient) GetWorkRequest(ctx context.Context, request workrequests.GetWorkRequestRequest) (workrequests.GetWorkRequestResponse, error) {
return workrequests.GetWorkRequestResponse{}, m.err
}
func (m *mockWorkRequestClient) ListWorkRequests(ctx context.Context, request workrequests.ListWorkRequestsRequest) (workrequests.ListWorkRequestsResponse, error) {
return workrequests.ListWorkRequestsResponse{}, m.err
}
func (m *mockWorkRequestClient) ListWorkRequestErrors(ctx context.Context, request workrequests.ListWorkRequestErrorsRequest) (workrequests.ListWorkRequestErrorsResponse, error) {
return workrequests.ListWorkRequestErrorsResponse{}, m.err
}
func (m *mockComputeClient) ListVnicAttachments(ctx context.Context, request core.ListVnicAttachmentsRequest) (core.ListVnicAttachmentsResponse, error) {
return m.listVnicAttachmentsResponse, m.err
}
@ -111,6 +128,10 @@ var virtualNetworkClient = &mockVirtualNetworkClient{
},
}
var workRequestsClient = &mockWorkRequestClient{
err: nil,
}
func TestInstancePoolFromArgs(t *testing.T) {
value := `1:5:ocid1.instancepool.oc1.phx.aaaaaaaah`
@ -146,7 +167,7 @@ func TestInstancePoolFromArgs(t *testing.T) {
func TestGetSetInstancePoolSize(t *testing.T) {
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaai"] = &core.InstancePool{Size: common.Int(2)}
manager := &InstancePoolManagerImpl{instancePoolCache: nodePoolCache}
@ -183,7 +204,7 @@ func TestGetSetInstancePoolSize(t *testing.T) {
func TestGetInstancePoolForInstance(t *testing.T) {
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
Size: common.Int(1),
@ -267,7 +288,7 @@ func TestGetInstancePoolForInstance(t *testing.T) {
func TestGetInstancePoolNodes(t *testing.T) {
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
nodePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
nodePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaa1"),
@ -406,7 +427,7 @@ func TestGetInstancePoolsAndInstances(t *testing.T) {
staticInstancePools: map[string]*InstancePoolNodeGroup{
"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
},
instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient),
}
// Populate cache(s) (twice to increase code coverage).
@ -481,7 +502,7 @@ func TestDeleteInstances(t *testing.T) {
staticInstancePools: map[string]*InstancePoolNodeGroup{
"ocid1.instancepool.oc1.phx.aaaaaaaa1": {id: "ocid1.instancepool.oc1.phx.aaaaaaaa1"},
},
instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient),
instancePoolCache: newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient),
}
manager.shapeGetter = createShapeGetter(shapeClient)
// Populate cache(s).

View File

@ -18,6 +18,7 @@ package oci
import (
apiv1 "k8s.io/api/core/v1"
"strings"
)
// OciRef contains s reference to some entity in OCI world.
@ -33,6 +34,7 @@ type OciRef struct {
}
func nodeToOciRef(n *apiv1.Node) (OciRef, error) {
return OciRef{
Name: n.ObjectMeta.Name,
AvailabilityDomain: getNodeAZ(n),
@ -90,6 +92,12 @@ func getNodeExternalAddress(node *apiv1.Node) string {
// getNodeInstancePoolID returns the instance pool ID if set as a label or annotation or an empty string if is not found.
func getNodeInstancePoolID(node *apiv1.Node) string {
// Handle unfilled instance placeholder (instances that have yet to be created)
if strings.Contains(node.Name, instanceIDUnfulfilled) {
instIndex := strings.LastIndex(node.Name, "-")
return strings.Replace(node.Name[:instIndex], instanceIDUnfulfilled, "", 1)
}
poolIDPrefixLabel, _ := node.Labels[instancePoolIDLabelPrefix]
poolIDSuffixLabel, _ := node.Labels[instancePoolIDLabelSuffix]
@ -104,6 +112,11 @@ func getNodeInstancePoolID(node *apiv1.Node) string {
// getNodeInstanceID returns the instance ID if set as a label or annotation or an empty string if is not found.
func getNodeInstanceID(node *apiv1.Node) string {
// Handle unfilled instance placeholder (instances that have yet to be created)
if strings.Contains(node.Name, instanceIDUnfulfilled) {
return node.Name
}
instancePrefixLabel, _ := node.Labels[instanceIDLabelPrefix]
instanceSuffixLabel, _ := node.Labels[instanceIDLabelSuffix]

View File

@ -19,6 +19,7 @@ package oci
import (
"context"
"fmt"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/common"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/oci-go-sdk/v43/core"
"k8s.io/klog/v2"
)
@ -117,10 +118,29 @@ func (osf *shapeGetterImpl) GetInstancePoolShape(ip *core.InstancePool) (*Shape,
shape.MemoryInBytes = *instanceDetails.LaunchDetails.ShapeConfig.MemoryInGBs * 1024 * 1024 * 1024
}
} else {
allShapes, _ := osf.shapeClient.ListShapes(context.Background(), core.ListShapesRequest{
CompartmentId: instanceConfig.CompartmentId,
})
for _, nextShape := range allShapes.Items {
// Fetch the shape object by name
var page *string
var everyShape []core.Shape
for {
// List all available shapes
lisShapesReq := core.ListShapesRequest{}
lisShapesReq.CompartmentId = instanceConfig.CompartmentId
lisShapesReq.Page = page
lisShapesReq.Limit = common.Int(50)
listShapes, err := osf.shapeClient.ListShapes(context.Background(), lisShapesReq)
if err != nil {
return nil, err
}
everyShape = append(everyShape, listShapes.Items...)
if page = listShapes.OpcNextPage; listShapes.OpcNextPage == nil {
break
}
}
for _, nextShape := range everyShape {
if *nextShape.Shape == *instanceDetails.LaunchDetails.Shape {
shape.Name = *nextShape.Shape
if nextShape.Ocpus != nil {

View File

@ -4,7 +4,6 @@ import (
"context"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
kubeletapis "k8s.io/kubelet/pkg/apis"
"reflect"
"strings"
"testing"
@ -117,7 +116,7 @@ func TestGetShape(t *testing.T) {
}
func TestGetInstancePoolTemplateNode(t *testing.T) {
instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient)
instancePoolCache := newInstancePoolCache(computeManagementClient, computeClient, virtualNetworkClient, workRequestsClient)
instancePoolCache.poolCache["ocid1.instancepool.oc1.phx.aaaaaaaa1"] = &core.InstancePool{
Id: common.String("ocid1.instancepool.oc1.phx.aaaaaaaa1"),
CompartmentId: common.String("ocid1.compartment.oc1..aaaaaaaa1"),
@ -182,9 +181,7 @@ func TestBuildGenericLabels(t *testing.T) {
availabilityDomain := "US-ASHBURN-1"
expected := map[string]string{
kubeletapis.LabelArch: cloudprovider.DefaultArch,
apiv1.LabelArchStable: cloudprovider.DefaultArch,
kubeletapis.LabelOS: cloudprovider.DefaultOS,
apiv1.LabelOSStable: cloudprovider.DefaultOS,
apiv1.LabelZoneRegion: "phx",
apiv1.LabelZoneRegionStable: "phx",

View File

@ -151,6 +151,11 @@ func (provider *OVHCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovi
return ng, err
}
// HasInstance returns whether a given node has a corresponding instance in this cloud provider
func (provider *OVHCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// findNodeGroupFromCache tries to retrieve the associated node group from an already built mapping in cache
func (provider *OVHCloudProvider) findNodeGroupFromCache(providerID string) cloudprovider.NodeGroup {
if ng, ok := provider.manager.NodeGroupPerProviderID[providerID]; ok {

Some files were not shown because too many files have changed in this diff Show More