Compare commits
28 Commits
cluster-au
...
master
Author | SHA1 | Date |
---|---|---|
|
c7a51426e2 | |
|
c1352dad7c | |
|
36804f199c | |
|
172a22c195 | |
|
4bd2e67a1d | |
|
72bf359268 | |
|
26d6b38699 | |
|
ff6e93bfc3 | |
|
1938d3971a | |
|
3d748040d9 | |
|
2eb5adda2c | |
|
0d36de3aa2 | |
|
1d5f0471bc | |
|
1fbc7a9d48 | |
|
e8941e59a0 | |
|
9a256e5c83 | |
|
f9b93ec395 | |
|
0d14eca879 | |
|
aae2a010f1 | |
|
c187e7f147 | |
|
9bc422016f | |
|
008a3b916e | |
|
77cb4c8bf8 | |
|
96b13193e3 | |
|
e51dcfb60b | |
|
6eebb82f0d | |
|
9cc45e2a24 | |
|
8a954bc021 |
|
@ -23,7 +23,7 @@ We'd love to accept your patches! Before we can take them, we have to jump a cou
|
||||||
|
|
||||||
All changes must be code reviewed. Coding conventions and standards are explained in the official
|
All changes must be code reviewed. Coding conventions and standards are explained in the official
|
||||||
[developer docs](https://github.com/kubernetes/community/tree/master/contributors/devel). Expect
|
[developer docs](https://github.com/kubernetes/community/tree/master/contributors/devel). Expect
|
||||||
reviewers to request that you avoid common [go style mistakes](https://github.com/golang/go/wiki/CodeReviewComments)
|
reviewers to request that you avoid common [go style mistakes](https://go.dev/wiki/CodeReviewComments)
|
||||||
in your PRs.
|
in your PRs.
|
||||||
|
|
||||||
### Merge Approval
|
### Merge Approval
|
||||||
|
|
|
@ -107,7 +107,20 @@ build-in-docker-arch-%: clean-arch-% docker-builder
|
||||||
docker run ${RM_FLAG} -v `pwd`:/gopath/src/k8s.io/autoscaler/cluster-autoscaler/:Z autoscaling-builder:latest \
|
docker run ${RM_FLAG} -v `pwd`:/gopath/src/k8s.io/autoscaler/cluster-autoscaler/:Z autoscaling-builder:latest \
|
||||||
bash -c 'cd /gopath/src/k8s.io/autoscaler/cluster-autoscaler && BUILD_TAGS=${BUILD_TAGS} LDFLAGS="${LDFLAGS}" make build-arch-$*'
|
bash -c 'cd /gopath/src/k8s.io/autoscaler/cluster-autoscaler && BUILD_TAGS=${BUILD_TAGS} LDFLAGS="${LDFLAGS}" make build-arch-$*'
|
||||||
|
|
||||||
release: $(addprefix build-in-docker-arch-,$(ALL_ARCH)) execute-release
|
release-extract-version = $(shell cat version/version.go | grep "Version =" | cut -d '"' -f 2)
|
||||||
|
|
||||||
|
release-validate:
|
||||||
|
@if [ -z $(shell git tag --points-at HEAD | grep -e ^cluster-autoscaler-1.[1-9][0-9]*.[0-9][0-9]*$) ]; then \
|
||||||
|
echo "Can't release from this commit, there is no compatible git tag"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
@if [ -z $(shell git tag --points-at HEAD | grep -e $(call release-extract-version)) ]; then \
|
||||||
|
echo "Can't release from this commit, git tag does not match version/version.go"; \
|
||||||
|
exit 1; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
release: TAG=v$(call release-extract-version)
|
||||||
|
release: release-validate $(addprefix build-in-docker-arch-,$(ALL_ARCH)) execute-release
|
||||||
@echo "Full in-docker release ${TAG}${FOR_PROVIDER} completed"
|
@echo "Full in-docker release ${TAG}${FOR_PROVIDER} completed"
|
||||||
|
|
||||||
container: container-arch-$(GOARCH)
|
container: container-arch-$(GOARCH)
|
||||||
|
|
|
@ -10,5 +10,6 @@ reviewers:
|
||||||
- feiskyer
|
- feiskyer
|
||||||
- vadasambar
|
- vadasambar
|
||||||
- x13n
|
- x13n
|
||||||
|
- elmiko
|
||||||
labels:
|
labels:
|
||||||
- area/cluster-autoscaler
|
- area/cluster-autoscaler
|
||||||
|
|
|
@ -103,6 +103,9 @@ type Config struct {
|
||||||
|
|
||||||
// EnableFastDeleteOnFailedProvisioning defines whether to delete the experimental faster VMSS instance deletion on failed provisioning
|
// EnableFastDeleteOnFailedProvisioning defines whether to delete the experimental faster VMSS instance deletion on failed provisioning
|
||||||
EnableFastDeleteOnFailedProvisioning bool `json:"enableFastDeleteOnFailedProvisioning,omitempty" yaml:"enableFastDeleteOnFailedProvisioning,omitempty"`
|
EnableFastDeleteOnFailedProvisioning bool `json:"enableFastDeleteOnFailedProvisioning,omitempty" yaml:"enableFastDeleteOnFailedProvisioning,omitempty"`
|
||||||
|
|
||||||
|
// EnableLabelPredictionsOnTemplate defines whether to enable label predictions on the template when scaling from zero
|
||||||
|
EnableLabelPredictionsOnTemplate bool `json:"enableLabelPredictionsOnTemplate,omitempty" yaml:"enableLabelPredictionsOnTemplate,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
|
// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
|
||||||
|
@ -133,6 +136,7 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
|
||||||
cfg.VMType = providerazureconsts.VMTypeVMSS
|
cfg.VMType = providerazureconsts.VMTypeVMSS
|
||||||
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
|
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
|
||||||
cfg.StrictCacheUpdates = false
|
cfg.StrictCacheUpdates = false
|
||||||
|
cfg.EnableLabelPredictionsOnTemplate = true
|
||||||
|
|
||||||
// Config file overrides defaults
|
// Config file overrides defaults
|
||||||
if configReader != nil {
|
if configReader != nil {
|
||||||
|
@ -308,6 +312,9 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
|
||||||
if _, err = assignBoolFromEnvIfExists(&cfg.EnableFastDeleteOnFailedProvisioning, "AZURE_ENABLE_FAST_DELETE_ON_FAILED_PROVISIONING"); err != nil {
|
if _, err = assignBoolFromEnvIfExists(&cfg.EnableFastDeleteOnFailedProvisioning, "AZURE_ENABLE_FAST_DELETE_ON_FAILED_PROVISIONING"); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
if _, err = assignBoolFromEnvIfExists(&cfg.EnableLabelPredictionsOnTemplate, "AZURE_ENABLE_LABEL_PREDICTIONS_ON_TEMPLATE"); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
// Nonstatic defaults
|
// Nonstatic defaults
|
||||||
cfg.VMType = strings.ToLower(cfg.VMType)
|
cfg.VMType = strings.ToLower(cfg.VMType)
|
||||||
|
|
|
@ -89,6 +89,8 @@ type ScaleSet struct {
|
||||||
dedicatedHost bool
|
dedicatedHost bool
|
||||||
|
|
||||||
enableFastDeleteOnFailedProvisioning bool
|
enableFastDeleteOnFailedProvisioning bool
|
||||||
|
|
||||||
|
enableLabelPredictionsOnTemplate bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewScaleSet creates a new NewScaleSet.
|
// NewScaleSet creates a new NewScaleSet.
|
||||||
|
@ -108,10 +110,11 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64, d
|
||||||
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
|
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
|
||||||
},
|
},
|
||||||
|
|
||||||
enableForceDelete: az.config.EnableForceDelete,
|
enableForceDelete: az.config.EnableForceDelete,
|
||||||
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
|
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
|
||||||
enableDetailedCSEMessage: az.config.EnableDetailedCSEMessage,
|
enableDetailedCSEMessage: az.config.EnableDetailedCSEMessage,
|
||||||
dedicatedHost: dedicatedHost,
|
enableLabelPredictionsOnTemplate: az.config.EnableLabelPredictionsOnTemplate,
|
||||||
|
dedicatedHost: dedicatedHost,
|
||||||
}
|
}
|
||||||
|
|
||||||
if az.config.VmssVirtualMachinesCacheTTLInSeconds != 0 {
|
if az.config.VmssVirtualMachinesCacheTTLInSeconds != 0 {
|
||||||
|
@ -662,7 +665,7 @@ func (scaleSet *ScaleSet) TemplateNodeInfo() (*framework.NodeInfo, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)
|
node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList, scaleSet.enableLabelPredictionsOnTemplate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -211,7 +211,7 @@ func buildNodeTemplateFromVMPool(vmsPool armcontainerservice.AgentPool, location
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
|
func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager *AzureManager, enableDynamicInstanceList bool, enableLabelPrediction bool) (*apiv1.Node, error) {
|
||||||
node := apiv1.Node{}
|
node := apiv1.Node{}
|
||||||
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())
|
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())
|
||||||
|
|
||||||
|
@ -272,7 +272,7 @@ func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager
|
||||||
node.Status.Allocatable = node.Status.Capacity
|
node.Status.Allocatable = node.Status.Capacity
|
||||||
|
|
||||||
if template.VMSSNodeTemplate != nil {
|
if template.VMSSNodeTemplate != nil {
|
||||||
node = processVMSSTemplate(template, nodeName, node)
|
node = processVMSSTemplate(template, nodeName, node, enableLabelPrediction)
|
||||||
} else if template.VMPoolNodeTemplate != nil {
|
} else if template.VMPoolNodeTemplate != nil {
|
||||||
node = processVMPoolTemplate(template, nodeName, node)
|
node = processVMPoolTemplate(template, nodeName, node)
|
||||||
} else {
|
} else {
|
||||||
|
@ -298,7 +298,7 @@ func processVMPoolTemplate(template NodeTemplate, nodeName string, node apiv1.No
|
||||||
return node
|
return node
|
||||||
}
|
}
|
||||||
|
|
||||||
func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node) apiv1.Node {
|
func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node, enableLabelPrediction bool) apiv1.Node {
|
||||||
// NodeLabels
|
// NodeLabels
|
||||||
if template.VMSSNodeTemplate.Tags != nil {
|
if template.VMSSNodeTemplate.Tags != nil {
|
||||||
for k, v := range template.VMSSNodeTemplate.Tags {
|
for k, v := range template.VMSSNodeTemplate.Tags {
|
||||||
|
@ -324,45 +324,50 @@ func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node
|
||||||
labels = extractLabelsFromTags(template.VMSSNodeTemplate.Tags)
|
labels = extractLabelsFromTags(template.VMSSNodeTemplate.Tags)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add the agentpool label, its value should come from the VMSS poolName tag
|
// This is the best-effort to match AKS system labels,
|
||||||
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
|
// this prediction needs to be constantly worked on and maintained to keep up with the changes in AKS
|
||||||
// We will have to live with both labels for a while
|
if enableLabelPrediction {
|
||||||
if node.Labels[legacyPoolNameTag] != "" {
|
// Add the agentpool label, its value should come from the VMSS poolName tag
|
||||||
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
|
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
|
||||||
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
|
// We will have to live with both labels for a while
|
||||||
}
|
if node.Labels[legacyPoolNameTag] != "" {
|
||||||
if node.Labels[poolNameTag] != "" {
|
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
|
||||||
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
|
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
|
||||||
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
|
}
|
||||||
}
|
if node.Labels[poolNameTag] != "" {
|
||||||
|
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
|
||||||
|
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
|
||||||
|
}
|
||||||
|
|
||||||
// Add the storage profile and storage tier labels for vmss node
|
// Add the storage profile and storage tier labels for vmss node
|
||||||
if template.VMSSNodeTemplate.OSDisk != nil {
|
if template.VMSSNodeTemplate.OSDisk != nil {
|
||||||
// ephemeral
|
// ephemeral
|
||||||
if template.VMSSNodeTemplate.OSDisk.DiffDiskSettings != nil && template.VMSSNodeTemplate.OSDisk.DiffDiskSettings.Option == compute.Local {
|
if template.VMSSNodeTemplate.OSDisk.DiffDiskSettings != nil && template.VMSSNodeTemplate.OSDisk.DiffDiskSettings.Option == compute.Local {
|
||||||
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
|
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
|
||||||
labels[storageProfileNodeLabelKey] = "ephemeral"
|
labels[storageProfileNodeLabelKey] = "ephemeral"
|
||||||
} else {
|
} else {
|
||||||
labels[legacyStorageProfileNodeLabelKey] = "managed"
|
labels[legacyStorageProfileNodeLabelKey] = "managed"
|
||||||
labels[storageProfileNodeLabelKey] = "managed"
|
labels[storageProfileNodeLabelKey] = "managed"
|
||||||
|
}
|
||||||
|
if template.VMSSNodeTemplate.OSDisk.ManagedDisk != nil {
|
||||||
|
labels[legacyStorageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
|
||||||
|
labels[storageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if template.VMSSNodeTemplate.OSDisk.ManagedDisk != nil {
|
|
||||||
labels[legacyStorageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
|
// If we are on GPU-enabled SKUs, append the accelerator
|
||||||
labels[storageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
|
// label so that CA makes better decision when scaling from zero for GPU pools
|
||||||
}
|
if isNvidiaEnabledSKU(template.SkuName) {
|
||||||
// Add ephemeral-storage value
|
labels[GPULabel] = "nvidia"
|
||||||
if template.VMSSNodeTemplate.OSDisk.DiskSizeGB != nil {
|
labels[legacyGPULabel] = "nvidia"
|
||||||
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VMSSNodeTemplate.OSDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
|
|
||||||
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VMSSNodeTemplate.OSDisk.DiskSizeGB)
|
|
||||||
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are on GPU-enabled SKUs, append the accelerator
|
// Add ephemeral-storage value
|
||||||
// label so that CA makes better decision when scaling from zero for GPU pools
|
if template.VMSSNodeTemplate.OSDisk != nil && template.VMSSNodeTemplate.OSDisk.DiskSizeGB != nil {
|
||||||
if isNvidiaEnabledSKU(template.SkuName) {
|
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VMSSNodeTemplate.OSDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
|
||||||
labels[GPULabel] = "nvidia"
|
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VMSSNodeTemplate.OSDisk.DiskSizeGB)
|
||||||
labels[legacyGPULabel] = "nvidia"
|
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract allocatables from tags
|
// Extract allocatables from tags
|
||||||
|
|
|
@ -291,3 +291,91 @@ func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
|
||||||
}
|
}
|
||||||
return set
|
return set
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildNodeFromTemplateWithLabelPrediction(t *testing.T) {
|
||||||
|
poolName := "testpool"
|
||||||
|
testSkuName := "Standard_DS2_v2"
|
||||||
|
testNodeName := "test-node"
|
||||||
|
|
||||||
|
vmss := compute.VirtualMachineScaleSet{
|
||||||
|
Response: autorest.Response{},
|
||||||
|
Sku: &compute.Sku{Name: &testSkuName},
|
||||||
|
Plan: nil,
|
||||||
|
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
|
||||||
|
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{
|
||||||
|
StorageProfile: &compute.VirtualMachineScaleSetStorageProfile{
|
||||||
|
OsDisk: &compute.VirtualMachineScaleSetOSDisk{
|
||||||
|
DiffDiskSettings: nil, // This makes it managed
|
||||||
|
ManagedDisk: &compute.VirtualMachineScaleSetManagedDiskParameters{
|
||||||
|
StorageAccountType: compute.StorageAccountTypesPremiumLRS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tags: map[string]*string{
|
||||||
|
"poolName": &poolName,
|
||||||
|
},
|
||||||
|
Zones: &[]string{"1", "2"},
|
||||||
|
Location: to.StringPtr("westus"),
|
||||||
|
}
|
||||||
|
|
||||||
|
template, err := buildNodeTemplateFromVMSS(vmss, map[string]string{}, "")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
manager := &AzureManager{}
|
||||||
|
node, err := buildNodeFromTemplate(testNodeName, template, manager, false, true)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, node)
|
||||||
|
|
||||||
|
// Verify label prediction labels are added
|
||||||
|
assert.Equal(t, poolName, node.Labels["agentpool"])
|
||||||
|
assert.Equal(t, poolName, node.Labels["kubernetes.azure.com/agentpool"])
|
||||||
|
assert.Equal(t, "managed", node.Labels["storageprofile"])
|
||||||
|
assert.Equal(t, "managed", node.Labels["kubernetes.azure.com/storageprofile"])
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildNodeFromTemplateWithEphemeralStorage(t *testing.T) {
|
||||||
|
poolName := "testpool"
|
||||||
|
testSkuName := "Standard_DS2_v2"
|
||||||
|
testNodeName := "test-node"
|
||||||
|
diskSizeGB := int32(128)
|
||||||
|
|
||||||
|
vmss := compute.VirtualMachineScaleSet{
|
||||||
|
Response: autorest.Response{},
|
||||||
|
Sku: &compute.Sku{Name: &testSkuName},
|
||||||
|
Plan: nil,
|
||||||
|
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
|
||||||
|
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{
|
||||||
|
StorageProfile: &compute.VirtualMachineScaleSetStorageProfile{
|
||||||
|
OsDisk: &compute.VirtualMachineScaleSetOSDisk{
|
||||||
|
DiskSizeGB: &diskSizeGB,
|
||||||
|
DiffDiskSettings: nil, // This makes it managed
|
||||||
|
ManagedDisk: &compute.VirtualMachineScaleSetManagedDiskParameters{
|
||||||
|
StorageAccountType: compute.StorageAccountTypesPremiumLRS,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Tags: map[string]*string{
|
||||||
|
"poolName": &poolName,
|
||||||
|
},
|
||||||
|
Zones: &[]string{"1", "2"},
|
||||||
|
Location: to.StringPtr("westus"),
|
||||||
|
}
|
||||||
|
|
||||||
|
template, err := buildNodeTemplateFromVMSS(vmss, map[string]string{}, "")
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
manager := &AzureManager{}
|
||||||
|
node, err := buildNodeFromTemplate(testNodeName, template, manager, false, false)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, node)
|
||||||
|
|
||||||
|
// Verify ephemeral storage is set correctly
|
||||||
|
expectedEphemeralStorage := resource.NewQuantity(int64(diskSizeGB)*1024*1024*1024, resource.DecimalSI)
|
||||||
|
ephemeralStorage, exists := node.Status.Capacity[apiv1.ResourceEphemeralStorage]
|
||||||
|
assert.True(t, exists)
|
||||||
|
assert.Equal(t, expectedEphemeralStorage.String(), ephemeralStorage.String())
|
||||||
|
}
|
||||||
|
|
|
@ -469,7 +469,7 @@ func (vmPool *VMPool) TemplateNodeInfo() (*framework.NodeInfo, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
node, err := buildNodeFromTemplate(vmPool.agentPoolName, template, vmPool.manager, vmPool.manager.config.EnableDynamicInstanceList)
|
node, err := buildNodeFromTemplate(vmPool.agentPoolName, template, vmPool.manager, vmPool.manager.config.EnableDynamicInstanceList, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,12 @@ in the staging namespace, belonging to the purple cluster, with the label owner=
|
||||||
|
|
||||||
## Connecting cluster-autoscaler to Cluster API management and workload Clusters
|
## Connecting cluster-autoscaler to Cluster API management and workload Clusters
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> `--cloud-config` is the flag for specifying a mount volume path to the kubernetes configuration (ie KUBECONFIG) to the cluster-autoscaler for communicating with the cluster-api management cluster for the purpose of scaling machines.
|
||||||
|
|
||||||
|
> [!IMPORTANT]
|
||||||
|
> ``--kubeconfig` is the flag for specifying a mount volume path to the kubernetes configuration (ie KUBECONFIG) to the cluster-autoscaler for communicating with the cluster-api workload cluster for the purpose of watching Nodes and Pods. This flag can be affected by the desired topology for deploying the cluster-autoscaler, please see the diagrams below for more information.
|
||||||
|
|
||||||
You will also need to provide the path to the kubeconfig(s) for the management
|
You will also need to provide the path to the kubeconfig(s) for the management
|
||||||
and workload cluster you wish cluster-autoscaler to run against. To specify the
|
and workload cluster you wish cluster-autoscaler to run against. To specify the
|
||||||
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`
|
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`
|
||||||
|
|
|
@ -10,16 +10,16 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
|
||||||
|
|
||||||
`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.
|
`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.
|
||||||
|
|
||||||
`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
|
`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
|
||||||
* `HCLOUD_CLOUD_INIT`
|
* `HCLOUD_CLOUD_INIT`
|
||||||
* `HCLOUD_IMAGE`
|
* `HCLOUD_IMAGE`
|
||||||
|
|
||||||
Base64 encoded JSON according to the following structure
|
Base64 encoded JSON according to the following structure
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"imagesForArch": { // These should be the same format as HCLOUD_IMAGE
|
"imagesForArch": { // These should be the same format as HCLOUD_IMAGE
|
||||||
"arm64": "",
|
"arm64": "",
|
||||||
"amd64": ""
|
"amd64": ""
|
||||||
},
|
},
|
||||||
"nodeConfigs": {
|
"nodeConfigs": {
|
||||||
|
@ -28,7 +28,7 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
|
||||||
"labels": {
|
"labels": {
|
||||||
"node.kubernetes.io/role": "autoscaler-node"
|
"node.kubernetes.io/role": "autoscaler-node"
|
||||||
},
|
},
|
||||||
"taints":
|
"taints":
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"key": "node.kubernetes.io/role",
|
"key": "node.kubernetes.io/role",
|
||||||
|
@ -47,6 +47,13 @@ Can be useful when you have many different node pools and run into issues of the
|
||||||
|
|
||||||
**NOTE**: In contrast to `HCLOUD_CLUSTER_CONFIG`, this file is not base64 encoded.
|
**NOTE**: In contrast to `HCLOUD_CLUSTER_CONFIG`, this file is not base64 encoded.
|
||||||
|
|
||||||
|
The global `imagesForArch` configuration can be overridden on a per-nodepool basis by adding an `imagesForArch` field to individual nodepool configurations.
|
||||||
|
|
||||||
|
The image selection logic works as follows:
|
||||||
|
|
||||||
|
1. If a nodepool has its own `imagesForArch` configuration, it will be used for that specific nodepool
|
||||||
|
1. If a nodepool doesn't have `imagesForArch` configured, the global `imagesForArch` configuration will be used as a fallback
|
||||||
|
1. If neither is configured, the legacy `HCLOUD_IMAGE` environment variable will be used
|
||||||
|
|
||||||
`HCLOUD_NETWORK` Default empty , The id or name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks
|
`HCLOUD_NETWORK` Default empty , The id or name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks
|
||||||
|
|
||||||
|
@ -105,5 +112,5 @@ git add hcloud-go/
|
||||||
|
|
||||||
## Debugging
|
## Debugging
|
||||||
|
|
||||||
To enable debug logging, set the log level of the autoscaler to at least level 5 via cli flag: `--v=5`
|
To enable debug logging, set the log level of the autoscaler to at least level 5 via cli flag: `--v=5`
|
||||||
The logs will include all requests and responses made towards the Hetzner API including headers and body.
|
The logs will include all requests and responses made towards the Hetzner API including headers and body.
|
||||||
|
|
|
@ -77,6 +77,7 @@ type NodeConfig struct {
|
||||||
PlacementGroup string
|
PlacementGroup string
|
||||||
Taints []apiv1.Taint
|
Taints []apiv1.Taint
|
||||||
Labels map[string]string
|
Labels map[string]string
|
||||||
|
ImagesForArch *ImageList
|
||||||
}
|
}
|
||||||
|
|
||||||
// LegacyConfig holds the configuration in the legacy format
|
// LegacyConfig holds the configuration in the legacy format
|
||||||
|
|
|
@ -528,12 +528,20 @@ func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Imag
|
||||||
// Select correct image based on server type architecture
|
// Select correct image based on server type architecture
|
||||||
imageName := n.manager.clusterConfig.LegacyConfig.ImageName
|
imageName := n.manager.clusterConfig.LegacyConfig.ImageName
|
||||||
if n.manager.clusterConfig.IsUsingNewFormat {
|
if n.manager.clusterConfig.IsUsingNewFormat {
|
||||||
|
// Check for nodepool-specific images first, then fall back to global images
|
||||||
|
var imagesForArch *ImageList
|
||||||
|
if nodeConfig, exists := n.manager.clusterConfig.NodeConfigs[n.id]; exists && nodeConfig.ImagesForArch != nil {
|
||||||
|
imagesForArch = nodeConfig.ImagesForArch
|
||||||
|
} else {
|
||||||
|
imagesForArch = &n.manager.clusterConfig.ImagesForArch
|
||||||
|
}
|
||||||
|
|
||||||
if serverType.Architecture == hcloud.ArchitectureARM {
|
if serverType.Architecture == hcloud.ArchitectureARM {
|
||||||
imageName = n.manager.clusterConfig.ImagesForArch.Arm64
|
imageName = imagesForArch.Arm64
|
||||||
}
|
}
|
||||||
|
|
||||||
if serverType.Architecture == hcloud.ArchitectureX86 {
|
if serverType.Architecture == hcloud.ArchitectureX86 {
|
||||||
imageName = n.manager.clusterConfig.ImagesForArch.Amd64
|
imageName = imagesForArch.Amd64
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,174 @@
|
||||||
|
/*
|
||||||
|
Copyright 2019 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package hetzner
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestFindImageWithPerNodepoolConfig(t *testing.T) {
|
||||||
|
// Test case 1: Nodepool with specific imagesForArch should use those images
|
||||||
|
t.Run("nodepool with specific imagesForArch", func(t *testing.T) {
|
||||||
|
manager := &hetznerManager{
|
||||||
|
clusterConfig: &ClusterConfig{
|
||||||
|
IsUsingNewFormat: true,
|
||||||
|
ImagesForArch: ImageList{
|
||||||
|
Arm64: "global-arm64-image",
|
||||||
|
Amd64: "global-amd64-image",
|
||||||
|
},
|
||||||
|
NodeConfigs: map[string]*NodeConfig{
|
||||||
|
"pool1": {
|
||||||
|
ImagesForArch: &ImageList{
|
||||||
|
Arm64: "pool1-arm64-image",
|
||||||
|
Amd64: "pool1-amd64-image",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeGroup := &hetznerNodeGroup{
|
||||||
|
id: "pool1",
|
||||||
|
manager: manager,
|
||||||
|
}
|
||||||
|
|
||||||
|
// This would normally call the actual API, but we're just testing the logic
|
||||||
|
// The actual image selection logic is in findImage function
|
||||||
|
// For this test, we'll verify the configuration is set up correctly
|
||||||
|
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
|
||||||
|
require.True(t, exists)
|
||||||
|
require.NotNil(t, nodeConfig.ImagesForArch)
|
||||||
|
assert.Equal(t, "pool1-arm64-image", nodeConfig.ImagesForArch.Arm64)
|
||||||
|
assert.Equal(t, "pool1-amd64-image", nodeConfig.ImagesForArch.Amd64)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Test case 2: Nodepool without specific imagesForArch should fall back to global
|
||||||
|
t.Run("nodepool without specific imagesForArch", func(t *testing.T) {
|
||||||
|
manager := &hetznerManager{
|
||||||
|
clusterConfig: &ClusterConfig{
|
||||||
|
IsUsingNewFormat: true,
|
||||||
|
ImagesForArch: ImageList{
|
||||||
|
Arm64: "global-arm64-image",
|
||||||
|
Amd64: "global-amd64-image",
|
||||||
|
},
|
||||||
|
NodeConfigs: map[string]*NodeConfig{
|
||||||
|
"pool2": {
|
||||||
|
// No ImagesForArch specified
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeGroup := &hetznerNodeGroup{
|
||||||
|
id: "pool2",
|
||||||
|
manager: manager,
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
|
||||||
|
require.True(t, exists)
|
||||||
|
assert.Nil(t, nodeConfig.ImagesForArch)
|
||||||
|
assert.Equal(t, "global-arm64-image", manager.clusterConfig.ImagesForArch.Arm64)
|
||||||
|
assert.Equal(t, "global-amd64-image", manager.clusterConfig.ImagesForArch.Amd64)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Test case 3: Nodepool with nil ImagesForArch should fall back to global
|
||||||
|
t.Run("nodepool with nil imagesForArch", func(t *testing.T) {
|
||||||
|
manager := &hetznerManager{
|
||||||
|
clusterConfig: &ClusterConfig{
|
||||||
|
IsUsingNewFormat: true,
|
||||||
|
ImagesForArch: ImageList{
|
||||||
|
Arm64: "global-arm64-image",
|
||||||
|
Amd64: "global-amd64-image",
|
||||||
|
},
|
||||||
|
NodeConfigs: map[string]*NodeConfig{
|
||||||
|
"pool3": {
|
||||||
|
ImagesForArch: nil, // Explicitly nil
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeGroup := &hetznerNodeGroup{
|
||||||
|
id: "pool3",
|
||||||
|
manager: manager,
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
|
||||||
|
require.True(t, exists)
|
||||||
|
assert.Nil(t, nodeConfig.ImagesForArch)
|
||||||
|
assert.Equal(t, "global-arm64-image", manager.clusterConfig.ImagesForArch.Arm64)
|
||||||
|
assert.Equal(t, "global-amd64-image", manager.clusterConfig.ImagesForArch.Amd64)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestImageSelectionLogic(t *testing.T) {
|
||||||
|
// Test the image selection logic that would be used in findImage function
|
||||||
|
t.Run("image selection logic", func(t *testing.T) {
|
||||||
|
manager := &hetznerManager{
|
||||||
|
clusterConfig: &ClusterConfig{
|
||||||
|
IsUsingNewFormat: true,
|
||||||
|
ImagesForArch: ImageList{
|
||||||
|
Arm64: "global-arm64-image",
|
||||||
|
Amd64: "global-amd64-image",
|
||||||
|
},
|
||||||
|
NodeConfigs: map[string]*NodeConfig{
|
||||||
|
"pool1": {
|
||||||
|
ImagesForArch: &ImageList{
|
||||||
|
Arm64: "pool1-arm64-image",
|
||||||
|
Amd64: "pool1-amd64-image",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"pool2": {
|
||||||
|
// No ImagesForArch specified
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test pool1 (has specific imagesForArch)
|
||||||
|
nodeConfig, exists := manager.clusterConfig.NodeConfigs["pool1"]
|
||||||
|
require.True(t, exists)
|
||||||
|
require.NotNil(t, nodeConfig.ImagesForArch)
|
||||||
|
|
||||||
|
var imagesForArch *ImageList
|
||||||
|
if nodeConfig.ImagesForArch != nil {
|
||||||
|
imagesForArch = nodeConfig.ImagesForArch
|
||||||
|
} else {
|
||||||
|
imagesForArch = &manager.clusterConfig.ImagesForArch
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, "pool1-arm64-image", imagesForArch.Arm64)
|
||||||
|
assert.Equal(t, "pool1-amd64-image", imagesForArch.Amd64)
|
||||||
|
|
||||||
|
// Test pool2 (no specific imagesForArch, should use global)
|
||||||
|
nodeConfig, exists = manager.clusterConfig.NodeConfigs["pool2"]
|
||||||
|
require.True(t, exists)
|
||||||
|
assert.Nil(t, nodeConfig.ImagesForArch)
|
||||||
|
|
||||||
|
if nodeConfig.ImagesForArch != nil {
|
||||||
|
imagesForArch = nodeConfig.ImagesForArch
|
||||||
|
} else {
|
||||||
|
imagesForArch = &manager.clusterConfig.ImagesForArch
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.Equal(t, "global-arm64-image", imagesForArch.Arm64)
|
||||||
|
assert.Equal(t, "global-amd64-image", imagesForArch.Amd64)
|
||||||
|
})
|
||||||
|
}
|
|
@ -461,12 +461,6 @@ func (m *ociManagerImpl) GetExistingNodePoolSizeViaCompute(np NodePool) (int, er
|
||||||
if !strings.HasPrefix(*item.DisplayName, displayNamePrefix) {
|
if !strings.HasPrefix(*item.DisplayName, displayNamePrefix) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
|
|
||||||
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
|
|
||||||
if *item.Id == "" {
|
|
||||||
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
switch item.LifecycleState {
|
switch item.LifecycleState {
|
||||||
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
|
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
|
||||||
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
|
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
|
||||||
|
@ -525,25 +519,23 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance
|
||||||
|
|
||||||
nodePool, err := m.nodePoolCache.get(np.Id())
|
nodePool, err := m.nodePoolCache.get(np.Id())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
klog.Error(err, "error while performing GetNodePoolNodes call")
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var instances []cloudprovider.Instance
|
var instances []cloudprovider.Instance
|
||||||
for _, node := range nodePool.Nodes {
|
for _, node := range nodePool.Nodes {
|
||||||
|
|
||||||
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
|
|
||||||
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
|
|
||||||
if *node.Id == "" {
|
|
||||||
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
if node.NodeError != nil {
|
if node.NodeError != nil {
|
||||||
|
|
||||||
|
// We should move away from the approach of determining a node error as a Out of host capacity
|
||||||
|
// through string comparison. An error code specifically for Out of host capacity must be set
|
||||||
|
// and returned in the API response.
|
||||||
errorClass := cloudprovider.OtherErrorClass
|
errorClass := cloudprovider.OtherErrorClass
|
||||||
if *node.NodeError.Code == "LimitExceeded" ||
|
if *node.NodeError.Code == "LimitExceeded" ||
|
||||||
(*node.NodeError.Code == "InternalServerError" &&
|
*node.NodeError.Code == "QuotaExceeded" ||
|
||||||
strings.Contains(*node.NodeError.Message, "quota")) {
|
(*node.NodeError.Code == "InternalError" &&
|
||||||
|
strings.Contains(*node.NodeError.Message, "Out of host capacity")) {
|
||||||
errorClass = cloudprovider.OutOfResourcesErrorClass
|
errorClass = cloudprovider.OutOfResourcesErrorClass
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,12 +6,11 @@ package nodepools
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
|
||||||
"net/http"
|
"net/http"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
|
|
||||||
|
|
||||||
apiv1 "k8s.io/api/core/v1"
|
apiv1 "k8s.io/api/core/v1"
|
||||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
|
||||||
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common"
|
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common"
|
||||||
|
@ -120,16 +119,10 @@ func TestGetNodePoolNodes(t *testing.T) {
|
||||||
{
|
{
|
||||||
Id: common.String("node8"),
|
Id: common.String("node8"),
|
||||||
NodeError: &oke.NodeError{
|
NodeError: &oke.NodeError{
|
||||||
Code: common.String("InternalServerError"),
|
Code: common.String("InternalError"),
|
||||||
Message: common.String("blah blah quota exceeded blah blah"),
|
Message: common.String("blah blah Out of host capacity blah blah"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
// This case happens if a node fails to scale up due to lack of capacity in the region.
|
|
||||||
// It's not a real node, so we shouldn't return it in the list of nodes.
|
|
||||||
Id: common.String(""),
|
|
||||||
LifecycleState: oke.NodeLifecycleStateCreating,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,8 +179,8 @@ func TestGetNodePoolNodes(t *testing.T) {
|
||||||
State: cloudprovider.InstanceCreating,
|
State: cloudprovider.InstanceCreating,
|
||||||
ErrorInfo: &cloudprovider.InstanceErrorInfo{
|
ErrorInfo: &cloudprovider.InstanceErrorInfo{
|
||||||
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
|
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
|
||||||
ErrorCode: "InternalServerError",
|
ErrorCode: "InternalError",
|
||||||
ErrorMessage: "blah blah quota exceeded blah blah",
|
ErrorMessage: "blah blah Out of host capacity blah blah",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
|
@ -214,6 +214,27 @@ func (np *nodePool) DecreaseTargetSize(delta int) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
klog.V(4).Infof("DECREASE_TARGET_CHECK_VIA_COMPUTE: %v", decreaseTargetCheckViaComputeBool)
|
klog.V(4).Infof("DECREASE_TARGET_CHECK_VIA_COMPUTE: %v", decreaseTargetCheckViaComputeBool)
|
||||||
|
np.manager.InvalidateAndRefreshCache()
|
||||||
|
nodes, err := np.manager.GetNodePoolNodes(np)
|
||||||
|
if err != nil {
|
||||||
|
klog.V(4).Error(err, "error while performing GetNodePoolNodes call")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// We do not have an OCI API that allows us to delete a node with a compute instance. So we rely on
|
||||||
|
// the below approach to determine the number running instance in a nodepool from the compute API and
|
||||||
|
//update the size of the nodepool accordingly. We should move away from this approach once we have an API
|
||||||
|
// to delete a specific node without a compute instance.
|
||||||
|
if !decreaseTargetCheckViaComputeBool {
|
||||||
|
for _, node := range nodes {
|
||||||
|
if node.Status != nil && node.Status.ErrorInfo != nil {
|
||||||
|
if node.Status.ErrorInfo.ErrorClass == cloudprovider.OutOfResourcesErrorClass {
|
||||||
|
klog.Infof("Using Compute to calculate nodepool size as nodepool may contain nodes without a compute instance.")
|
||||||
|
decreaseTargetCheckViaComputeBool = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
var nodesLen int
|
var nodesLen int
|
||||||
if decreaseTargetCheckViaComputeBool {
|
if decreaseTargetCheckViaComputeBool {
|
||||||
nodesLen, err = np.manager.GetExistingNodePoolSizeViaCompute(np)
|
nodesLen, err = np.manager.GetExistingNodePoolSizeViaCompute(np)
|
||||||
|
@ -222,12 +243,6 @@ func (np *nodePool) DecreaseTargetSize(delta int) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
np.manager.InvalidateAndRefreshCache()
|
|
||||||
nodes, err := np.manager.GetNodePoolNodes(np)
|
|
||||||
if err != nil {
|
|
||||||
klog.V(4).Error(err, "error while performing GetNodePoolNodes call")
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
nodesLen = len(nodes)
|
nodesLen = len(nodes)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,12 +16,12 @@ There are a wide variety of use cases here. Some examples are as follows:
|
||||||
## Configuration options
|
## Configuration options
|
||||||
As using this expander requires communication with another service, users must specify a few options as CLI arguments.
|
As using this expander requires communication with another service, users must specify a few options as CLI arguments.
|
||||||
|
|
||||||
```yaml
|
```bash
|
||||||
--grpcExpanderUrl
|
--grpc-expander-url
|
||||||
```
|
```
|
||||||
URL of the gRPC Expander server, for CA to communicate with.
|
URL of the gRPC Expander server, for CA to communicate with.
|
||||||
```yaml
|
```bash
|
||||||
--grpcExpanderCert
|
--grpc-expander-cert
|
||||||
```
|
```
|
||||||
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS
|
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ service. Note that the `protos/expander.pb.go` generated protobuf code will also
|
||||||
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in.
|
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in.
|
||||||
|
|
||||||
Deploy the gRPC Expander Server as a separate app, listening on a specifc port number.
|
Deploy the gRPC Expander Server as a separate app, listening on a specifc port number.
|
||||||
Start Cluster Autoscaler with the `--grpcExapnderURl=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpcExpanderCert` pointed at the location of the volume mounted certificate of the gRPC server.
|
Start Cluster Autoscaler with the `--grpc-expander-url=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpc-expander-cert` pointed at the location of the volume mounted certificate of the gRPC server.
|
||||||
|
|
||||||
## Details
|
## Details
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,21 @@ spec:
|
||||||
- containerPort: 8000
|
- containerPort: 8000
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
containerPort: 8944
|
containerPort: 8944
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
volumes:
|
volumes:
|
||||||
- name: tls-certs
|
- name: tls-certs
|
||||||
secret:
|
secret:
|
||||||
|
|
|
@ -41,3 +41,18 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
containerPort: 8942
|
containerPort: 8942
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
|
|
@ -41,3 +41,18 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
containerPort: 8942
|
containerPort: 8942
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
|
|
@ -32,3 +32,18 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
containerPort: 8942
|
containerPort: 8942
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
|
|
@ -37,3 +37,18 @@ spec:
|
||||||
ports:
|
ports:
|
||||||
- name: prometheus
|
- name: prometheus
|
||||||
containerPort: 8943
|
containerPort: 8943
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health-check
|
||||||
|
port: prometheus
|
||||||
|
scheme: HTTP
|
||||||
|
periodSeconds: 10
|
||||||
|
failureThreshold: 3
|
||||||
|
|
|
@ -0,0 +1,229 @@
|
||||||
|
# AEP-8026: Allow per-VPA component configuration parameters
|
||||||
|
|
||||||
|
<!-- toc -->
|
||||||
|
- [Summary](#summary)
|
||||||
|
- [Motivation](#motivation)
|
||||||
|
- [Goals](#goals)
|
||||||
|
- [Non-Goals](#non-goals)
|
||||||
|
- [Proposal](#proposal)
|
||||||
|
- [Parameter Descriptions](#parameter-descriptions)
|
||||||
|
- [Container Policy Parameters](#container-policy-parameters)
|
||||||
|
- [Update Policy Parameters](#update-policy-parameters)
|
||||||
|
- [Design Details](#design-details)
|
||||||
|
- [API Changes](#api-changes)
|
||||||
|
- [Phase 1 (Current Proposal)](#phase-1-current-proposal)
|
||||||
|
- [Future Extensions](#future-extensions)
|
||||||
|
- [Feature Enablement and Rollback](#feature-enablement-and-rollback)
|
||||||
|
- [How can this feature be enabled / disabled in a live cluster?](#how-can-this-feature-be-enabled--disabled-in-a-live-cluster)
|
||||||
|
- [Kubernetes version compatibility](#kubernetes-version-compatibility)
|
||||||
|
- [Validation via CEL and Testing](#validation-via-cel-and-testing)
|
||||||
|
- [Test Plan](#test-plan)
|
||||||
|
- [Implementation History](#implementation-history)
|
||||||
|
- [Future Work](#future-work)
|
||||||
|
- [Alternatives](#alternatives)
|
||||||
|
- [Multiple VPA Deployments](#multiple-vpa-deployments)
|
||||||
|
- [Environment-Specific Configuration](#environment-specific-configuration)
|
||||||
|
<!-- /toc -->
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
Currently, VPA components (recommender, updater, admission controller) are configured through global flags. This makes it challenging to support different workloads with varying resource optimization needs within the same cluster. This proposal introduces the ability to specify configuration parameters at the individual VPA object level, allowing for workload-specific optimization strategies.
|
||||||
|
|
||||||
|
## Motivation
|
||||||
|
|
||||||
|
Different types of workloads in a Kubernetes cluster often have different resource optimization requirements. For example:
|
||||||
|
- Batch processing jobs might benefit from aggressive OOM handling and frequent adjustments
|
||||||
|
- User-facing services might need more conservative growth patterns for stability
|
||||||
|
- Development environments might need different settings than production
|
||||||
|
|
||||||
|
Currently, supporting these different needs requires running multiple VPA component instances with different configurations, which increases operational complexity and resource usage.
|
||||||
|
|
||||||
|
### Goals
|
||||||
|
|
||||||
|
- Allow specification of component-specific parameters in individual VPA objects
|
||||||
|
- Support different optimization strategies for different workloads in the same cluster
|
||||||
|
- Maintain backward compatibility with existing global configuration
|
||||||
|
- Initially support the following parameters:
|
||||||
|
- oomBumpUpRatio
|
||||||
|
- oomMinBumpUp
|
||||||
|
- memoryAggregationInterval
|
||||||
|
- memoryAggregationIntervalCount
|
||||||
|
- evictAfterOOMThreshold
|
||||||
|
|
||||||
|
### Non-Goals
|
||||||
|
|
||||||
|
- Converting all existing VPA flags to per-object configuration
|
||||||
|
- Changing the core VPA algorithm or its decision-making process
|
||||||
|
- Adding new optimization strategies
|
||||||
|
|
||||||
|
## Proposal
|
||||||
|
|
||||||
|
The configuration will be split into two sections: container-specific recommendations under `containerPolicies` and updater configuration under `updatePolicy`. This structure is designed to be extensible, allowing for additional parameters to be added in future iterations of this enhancement.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: autoscaling.k8s.io/v1
|
||||||
|
kind: VerticalPodAutoscaler
|
||||||
|
metadata:
|
||||||
|
name: oom-test-vpa
|
||||||
|
spec:
|
||||||
|
targetRef:
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
name: oom-test
|
||||||
|
updatePolicy:
|
||||||
|
updateMode: Auto
|
||||||
|
evictAfterOOMThreshold: "5m"
|
||||||
|
resourcePolicy:
|
||||||
|
containerPolicies:
|
||||||
|
- containerName: "*"
|
||||||
|
oomBumpUpRatio: "1.5"
|
||||||
|
oomMinBumpUp: 104857600
|
||||||
|
memoryAggregationInterval: "12h"
|
||||||
|
memoryAggregationIntervalCount: 5
|
||||||
|
```
|
||||||
|
|
||||||
|
### Parameter Descriptions
|
||||||
|
|
||||||
|
#### Container Policy Parameters
|
||||||
|
#### Container Policy Parameters
|
||||||
|
* `oomBumpUpRatio` (Quantity):
|
||||||
|
- Multiplier applied to memory recommendations after OOM events
|
||||||
|
- Represented as a Quantity (e.g., "1.5")
|
||||||
|
- Must be greater than 1
|
||||||
|
- Controls how aggressively memory is increased after container crashes
|
||||||
|
|
||||||
|
* `oomMinBumpUp` (bytes):
|
||||||
|
- Minimum absolute memory increase after OOM events
|
||||||
|
- Ensures meaningful increases even for small containers
|
||||||
|
|
||||||
|
* `memoryAggregationInterval` (duration):
|
||||||
|
- Time window for aggregating memory usage data
|
||||||
|
- Affects how quickly VPA responds to memory usage changes
|
||||||
|
|
||||||
|
* `memoryAggregationIntervalCount` (integer):
|
||||||
|
- Number of consecutive memory aggregation intervals
|
||||||
|
- Used to calculate the total memory aggregation window length
|
||||||
|
- Total window length = memoryAggregationInterval * memoryAggregationIntervalCount
|
||||||
|
|
||||||
|
#### Update Policy Parameters
|
||||||
|
* `evictAfterOOMThreshold` (duration):
|
||||||
|
- Time to wait after OOM before considering pod eviction
|
||||||
|
- Helps prevent rapid eviction cycles while maintaining stability
|
||||||
|
|
||||||
|
Each parameter can be configured independently, falling back to global defaults if not specified. Values should be chosen based on workload characteristics and stability requirements.
|
||||||
|
|
||||||
|
## Design Details
|
||||||
|
|
||||||
|
### API Changes
|
||||||
|
|
||||||
|
#### Phase 1 (Current Proposal)
|
||||||
|
|
||||||
|
Extend `ContainerResourcePolicy` with:
|
||||||
|
* `oomBumpUpRatio`
|
||||||
|
* `oomMinBumpUp`
|
||||||
|
* `memoryAggregationInterval`
|
||||||
|
* `memoryAggregationIntervalCount`
|
||||||
|
|
||||||
|
Extend `PodUpdatePolicy` with:
|
||||||
|
* `evictAfterOOMThreshold`
|
||||||
|
|
||||||
|
#### Future Extensions
|
||||||
|
|
||||||
|
This AEP will be updated as additional parameters are identified for per-object configuration. Potential candidates include:
|
||||||
|
* `confidenceIntervalCPU`
|
||||||
|
* `confidenceIntervalMemory`
|
||||||
|
* `recommendationMarginFraction`
|
||||||
|
* Other parameters that benefit from workload-specific tuning
|
||||||
|
|
||||||
|
### Feature Enablement and Rollback
|
||||||
|
|
||||||
|
#### How can this feature be enabled / disabled in a live cluster?
|
||||||
|
|
||||||
|
- Feature gate name: `PerVPAConfig`
|
||||||
|
- Default: Off (Alpha)
|
||||||
|
- Components depending on the feature gate:
|
||||||
|
- admission-controller
|
||||||
|
- recommender
|
||||||
|
- updater
|
||||||
|
|
||||||
|
The feature gate will remain in alpha (default off) until:
|
||||||
|
- All planned configuration parameters have been implemented and tested
|
||||||
|
- Performance impact has been thoroughly evaluated
|
||||||
|
- Documentation is complete for all parameters
|
||||||
|
|
||||||
|
Disabling of feature gate `PerVPAConfig` will cause the following to happen:
|
||||||
|
|
||||||
|
- Any per-VPA configuration parameters specified in VPA objects will be ignored
|
||||||
|
- Components will fall back to using their global configuration values
|
||||||
|
|
||||||
|
Enabling of feature gate `PerVPAConfig` will cause the following to happen:
|
||||||
|
|
||||||
|
- VPA components will honor the per-VPA configuration parameters specified in VPA objects
|
||||||
|
- Validation will be performed on the configuration parameters
|
||||||
|
- Configuration parameters will override global defaults for the specific VPA object
|
||||||
|
|
||||||
|
### Kubernetes version compatibility
|
||||||
|
|
||||||
|
The `PerVPAConfig` feature requires VPA version 1.5.0 or higher. The feature is being introduced as alpha and will follow the standard Kubernetes feature gate graduation process:
|
||||||
|
- Alpha: v1.5.0 (default off)
|
||||||
|
- Beta: TBD (default on)
|
||||||
|
- GA: TBD (default on)
|
||||||
|
|
||||||
|
### Validation via CEL and Testing
|
||||||
|
|
||||||
|
Initial validation rules (CEL):
|
||||||
|
* `oomMinBumpUp` > 0
|
||||||
|
* `memoryAggregationInterval` > 0
|
||||||
|
* `evictAfterOOMThreshold` > 0
|
||||||
|
* `memoryAggregationIntervalCount` > 0
|
||||||
|
|
||||||
|
Validation via Admission Controller:
|
||||||
|
Some components cann't be validated using Common Expression Language (CEL). This validation is performed within the admission controller.
|
||||||
|
|
||||||
|
* `oomBumpUpRatio` – Using Kubernetes Quantity type for validation. The value must be greater than 1.
|
||||||
|
|
||||||
|
Additional validation rules will be added as new parameters are introduced.
|
||||||
|
E2E tests will be included to verify:
|
||||||
|
* Different configurations are properly applied and respected by VPA components
|
||||||
|
* VPA behavior matches expected outcomes for different parameter combinations
|
||||||
|
* Proper fallback to global configuration when parameters are not specified
|
||||||
|
|
||||||
|
### Test Plan
|
||||||
|
|
||||||
|
- Unit tests for new API fields and validation
|
||||||
|
- Integration tests verifying different configurations are properly applied
|
||||||
|
- E2E tests comparing behavior with different configurations
|
||||||
|
- Upgrade tests ensuring backward compatibility
|
||||||
|
|
||||||
|
## Implementation History
|
||||||
|
|
||||||
|
- 2025-04-12: Initial proposal
|
||||||
|
- Future: Additional parameters will be added based on user feedback and requirements
|
||||||
|
|
||||||
|
## Future Work
|
||||||
|
|
||||||
|
This enhancement is designed to be extensible. As the VPA evolves and users provide feedback, additional parameters may be added to the per-object configuration. Each new parameter will:
|
||||||
|
1. Be documented in this AEP
|
||||||
|
2. Include appropriate validation rules
|
||||||
|
3. Maintain backward compatibility
|
||||||
|
4. Follow the same pattern of falling back to global configuration when not specified
|
||||||
|
|
||||||
|
The decision to add new parameters will be based on:
|
||||||
|
- User feedback and requirements
|
||||||
|
- Performance impact analysis
|
||||||
|
- Implementation complexity
|
||||||
|
- Maintenance considerations
|
||||||
|
|
||||||
|
## Alternatives
|
||||||
|
|
||||||
|
### Multiple VPA Deployments
|
||||||
|
|
||||||
|
Continue with current approach of running multiple VPA deployments with different configurations:
|
||||||
|
- Pros: No API changes needed
|
||||||
|
- Cons: Higher resource usage, operational complexity
|
||||||
|
|
||||||
|
### Environment-Specific Configuration
|
||||||
|
|
||||||
|
Use different VPA deployments per environment (dev/staging/prod):
|
||||||
|
- Pros: Simpler than per-workload configuration
|
||||||
|
- Cons: Less flexible, doesn't address varying needs within same environment
|
|
@ -41,6 +41,7 @@ require (
|
||||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
github.com/json-iterator/go v1.1.12 // indirect
|
||||||
|
github.com/kylelemons/godebug v1.1.0 // indirect
|
||||||
github.com/mailru/easyjson v0.9.0 // indirect
|
github.com/mailru/easyjson v0.9.0 // indirect
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||||
|
|
|
@ -81,6 +81,7 @@ type ClusterStateFeederFactory struct {
|
||||||
KubeClient kube_client.Interface
|
KubeClient kube_client.Interface
|
||||||
MetricsClient metrics.MetricsClient
|
MetricsClient metrics.MetricsClient
|
||||||
VpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
|
VpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
|
||||||
|
VpaCheckpointLister vpa_lister.VerticalPodAutoscalerCheckpointLister
|
||||||
VpaLister vpa_lister.VerticalPodAutoscalerLister
|
VpaLister vpa_lister.VerticalPodAutoscalerLister
|
||||||
PodLister v1lister.PodLister
|
PodLister v1lister.PodLister
|
||||||
OOMObserver oom.Observer
|
OOMObserver oom.Observer
|
||||||
|
@ -99,6 +100,7 @@ func (m ClusterStateFeederFactory) Make() *clusterStateFeeder {
|
||||||
metricsClient: m.MetricsClient,
|
metricsClient: m.MetricsClient,
|
||||||
oomChan: m.OOMObserver.GetObservedOomsChannel(),
|
oomChan: m.OOMObserver.GetObservedOomsChannel(),
|
||||||
vpaCheckpointClient: m.VpaCheckpointClient,
|
vpaCheckpointClient: m.VpaCheckpointClient,
|
||||||
|
vpaCheckpointLister: m.VpaCheckpointLister,
|
||||||
vpaLister: m.VpaLister,
|
vpaLister: m.VpaLister,
|
||||||
clusterState: m.ClusterState,
|
clusterState: m.ClusterState,
|
||||||
specClient: spec.NewSpecClient(m.PodLister),
|
specClient: spec.NewSpecClient(m.PodLister),
|
||||||
|
@ -206,6 +208,7 @@ type clusterStateFeeder struct {
|
||||||
metricsClient metrics.MetricsClient
|
metricsClient metrics.MetricsClient
|
||||||
oomChan <-chan oom.OomInfo
|
oomChan <-chan oom.OomInfo
|
||||||
vpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
|
vpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
|
||||||
|
vpaCheckpointLister vpa_lister.VerticalPodAutoscalerCheckpointLister
|
||||||
vpaLister vpa_lister.VerticalPodAutoscalerLister
|
vpaLister vpa_lister.VerticalPodAutoscalerLister
|
||||||
clusterState model.ClusterState
|
clusterState model.ClusterState
|
||||||
selectorFetcher target.VpaTargetSelectorFetcher
|
selectorFetcher target.VpaTargetSelectorFetcher
|
||||||
|
@ -267,25 +270,29 @@ func (feeder *clusterStateFeeder) InitFromCheckpoints(ctx context.Context) {
|
||||||
klog.V(3).InfoS("Initializing VPA from checkpoints")
|
klog.V(3).InfoS("Initializing VPA from checkpoints")
|
||||||
feeder.LoadVPAs(ctx)
|
feeder.LoadVPAs(ctx)
|
||||||
|
|
||||||
|
klog.V(3).InfoS("Fetching VPA checkpoints")
|
||||||
|
checkpointList, err := feeder.vpaCheckpointLister.List(labels.Everything())
|
||||||
|
if err != nil {
|
||||||
|
klog.ErrorS(err, "Cannot list VPA checkpoints")
|
||||||
|
}
|
||||||
|
|
||||||
namespaces := make(map[string]bool)
|
namespaces := make(map[string]bool)
|
||||||
for _, v := range feeder.clusterState.VPAs() {
|
for _, v := range feeder.clusterState.VPAs() {
|
||||||
namespaces[v.ID.Namespace] = true
|
namespaces[v.ID.Namespace] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
for namespace := range namespaces {
|
for namespace := range namespaces {
|
||||||
klog.V(3).InfoS("Fetching checkpoints", "namespace", namespace)
|
if feeder.shouldIgnoreNamespace(namespace) {
|
||||||
checkpointList, err := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).List(ctx, metav1.ListOptions{})
|
klog.V(3).InfoS("Skipping loading VPA Checkpoints from namespace.", "namespace", namespace, "vpaObjectNamespace", feeder.vpaObjectNamespace, "ignoredNamespaces", feeder.ignoredNamespaces)
|
||||||
if err != nil {
|
continue
|
||||||
klog.ErrorS(err, "Cannot list VPA checkpoints", "namespace", namespace)
|
|
||||||
}
|
}
|
||||||
for _, checkpoint := range checkpointList.Items {
|
|
||||||
|
|
||||||
|
for _, checkpoint := range checkpointList {
|
||||||
klog.V(3).InfoS("Loading checkpoint for VPA", "checkpoint", klog.KRef(checkpoint.Namespace, checkpoint.Spec.VPAObjectName), "container", checkpoint.Spec.ContainerName)
|
klog.V(3).InfoS("Loading checkpoint for VPA", "checkpoint", klog.KRef(checkpoint.Namespace, checkpoint.Spec.VPAObjectName), "container", checkpoint.Spec.ContainerName)
|
||||||
err = feeder.setVpaCheckpoint(&checkpoint)
|
err = feeder.setVpaCheckpoint(checkpoint)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Error while loading checkpoint")
|
klog.ErrorS(err, "Error while loading checkpoint")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -345,11 +352,12 @@ func (feeder *clusterStateFeeder) shouldIgnoreNamespace(namespace string) bool {
|
||||||
|
|
||||||
func (feeder *clusterStateFeeder) cleanupCheckpointsForNamespace(ctx context.Context, namespace string, allVPAKeys map[model.VpaID]bool) error {
|
func (feeder *clusterStateFeeder) cleanupCheckpointsForNamespace(ctx context.Context, namespace string, allVPAKeys map[model.VpaID]bool) error {
|
||||||
var err error
|
var err error
|
||||||
checkpointList, err := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).List(ctx, metav1.ListOptions{})
|
checkpointList, err := feeder.vpaCheckpointLister.VerticalPodAutoscalerCheckpoints(namespace).List(labels.Everything())
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
for _, checkpoint := range checkpointList.Items {
|
for _, checkpoint := range checkpointList {
|
||||||
vpaID := model.VpaID{Namespace: checkpoint.Namespace, VpaName: checkpoint.Spec.VPAObjectName}
|
vpaID := model.VpaID{Namespace: checkpoint.Namespace, VpaName: checkpoint.Spec.VPAObjectName}
|
||||||
if !allVPAKeys[vpaID] {
|
if !allVPAKeys[vpaID] {
|
||||||
if errFeeder := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(ctx, checkpoint.Name, metav1.DeleteOptions{}); errFeeder != nil {
|
if errFeeder := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(ctx, checkpoint.Name, metav1.DeleteOptions{}); errFeeder != nil {
|
||||||
|
@ -573,6 +581,9 @@ func (feeder *clusterStateFeeder) validateTargetRef(ctx context.Context, vpa *vp
|
||||||
if vpa.Spec.TargetRef == nil {
|
if vpa.Spec.TargetRef == nil {
|
||||||
return false, condition{}
|
return false, condition{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
target := fmt.Sprintf("%s.%s/%s", vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind, vpa.Spec.TargetRef.Name)
|
||||||
|
|
||||||
k := controllerfetcher.ControllerKeyWithAPIVersion{
|
k := controllerfetcher.ControllerKeyWithAPIVersion{
|
||||||
ControllerKey: controllerfetcher.ControllerKey{
|
ControllerKey: controllerfetcher.ControllerKey{
|
||||||
Namespace: vpa.Namespace,
|
Namespace: vpa.Namespace,
|
||||||
|
@ -583,13 +594,13 @@ func (feeder *clusterStateFeeder) validateTargetRef(ctx context.Context, vpa *vp
|
||||||
}
|
}
|
||||||
top, err := feeder.controllerFetcher.FindTopMostWellKnownOrScalable(ctx, &k)
|
top, err := feeder.controllerFetcher.FindTopMostWellKnownOrScalable(ctx, &k)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Error checking if target is a topmost well-known or scalable controller: %s", err)}
|
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Error checking if target %s is a topmost well-known or scalable controller: %s", target, err)}
|
||||||
}
|
}
|
||||||
if top == nil {
|
if top == nil {
|
||||||
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Unknown error during checking if target is a topmost well-known or scalable controller: %s", err)}
|
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Unknown error during checking if target %s is a topmost well-known or scalable controller", target)}
|
||||||
}
|
}
|
||||||
if *top != k {
|
if *top != k {
|
||||||
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: "The targetRef controller has a parent but it should point to a topmost well-known or scalable controller"}
|
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("The target %s has a parent controller but it should point to a topmost well-known or scalable controller", target)}
|
||||||
}
|
}
|
||||||
return true, condition{}
|
return true, condition{}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,8 +65,8 @@ var (
|
||||||
unsupportedConditionTextFromFetcher = "Cannot read targetRef. Reason: targetRef not defined"
|
unsupportedConditionTextFromFetcher = "Cannot read targetRef. Reason: targetRef not defined"
|
||||||
unsupportedConditionNoExtraText = "Cannot read targetRef"
|
unsupportedConditionNoExtraText = "Cannot read targetRef"
|
||||||
unsupportedConditionNoTargetRef = "Cannot read targetRef"
|
unsupportedConditionNoTargetRef = "Cannot read targetRef"
|
||||||
unsupportedConditionMudaMudaMuda = "Error checking if target is a topmost well-known or scalable controller: muda muda muda"
|
unsupportedConditionMudaMudaMuda = "Error checking if target taxonomy.doestar/doseph-doestar is a topmost well-known or scalable controller: muda muda muda"
|
||||||
unsupportedTargetRefHasParent = "The targetRef controller has a parent but it should point to a topmost well-known or scalable controller"
|
unsupportedTargetRefHasParent = "The target stardust.dodokind/dotaro has a parent controller but it should point to a topmost well-known or scalable controller"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -859,11 +859,12 @@ func TestFilterVPAsIgnoreNamespaces(t *testing.T) {
|
||||||
func TestCanCleanupCheckpoints(t *testing.T) {
|
func TestCanCleanupCheckpoints(t *testing.T) {
|
||||||
_, tctx := ktesting.NewTestContext(t)
|
_, tctx := ktesting.NewTestContext(t)
|
||||||
client := fake.NewSimpleClientset()
|
client := fake.NewSimpleClientset()
|
||||||
|
namespace := "testNamespace"
|
||||||
|
|
||||||
_, err := client.CoreV1().Namespaces().Create(tctx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "testNamespace"}}, metav1.CreateOptions{})
|
_, err := client.CoreV1().Namespaces().Create(tctx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}, metav1.CreateOptions{})
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
vpaBuilder := test.VerticalPodAutoscaler().WithContainer("container").WithNamespace("testNamespace").WithTargetRef(&autoscalingv1.CrossVersionObjectReference{
|
vpaBuilder := test.VerticalPodAutoscaler().WithContainer("container").WithNamespace(namespace).WithTargetRef(&autoscalingv1.CrossVersionObjectReference{
|
||||||
Kind: kind,
|
Kind: kind,
|
||||||
Name: name1,
|
Name: name1,
|
||||||
APIVersion: apiVersion,
|
APIVersion: apiVersion,
|
||||||
|
@ -878,22 +879,19 @@ func TestCanCleanupCheckpoints(t *testing.T) {
|
||||||
vpaLister := &test.VerticalPodAutoscalerListerMock{}
|
vpaLister := &test.VerticalPodAutoscalerListerMock{}
|
||||||
vpaLister.On("List").Return(vpas, nil)
|
vpaLister.On("List").Return(vpas, nil)
|
||||||
|
|
||||||
checkpoints := &vpa_types.VerticalPodAutoscalerCheckpointList{
|
vpaCheckPoint := &vpa_types.VerticalPodAutoscalerCheckpoint{
|
||||||
Items: []vpa_types.VerticalPodAutoscalerCheckpoint{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
{
|
Namespace: namespace,
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
Name: "nonExistentVPA",
|
||||||
Namespace: "testNamespace",
|
},
|
||||||
Name: "nonExistentVPA",
|
Spec: vpa_types.VerticalPodAutoscalerCheckpointSpec{
|
||||||
},
|
VPAObjectName: "nonExistentVPA",
|
||||||
Spec: vpa_types.VerticalPodAutoscalerCheckpointSpec{
|
|
||||||
VPAObjectName: "nonExistentVPA",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
vpacheckpoints := []*vpa_types.VerticalPodAutoscalerCheckpoint{vpaCheckPoint}
|
||||||
|
|
||||||
for _, vpa := range vpas {
|
for _, vpa := range vpas {
|
||||||
checkpoints.Items = append(checkpoints.Items, vpa_types.VerticalPodAutoscalerCheckpoint{
|
vpacheckpoints = append(vpacheckpoints, &vpa_types.VerticalPodAutoscalerCheckpoint{
|
||||||
ObjectMeta: metav1.ObjectMeta{
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
Namespace: vpa.Namespace,
|
Namespace: vpa.Namespace,
|
||||||
Name: vpa.Name,
|
Name: vpa.Name,
|
||||||
|
@ -904,23 +902,29 @@ func TestCanCleanupCheckpoints(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a mock checkpoint client to track deletions
|
||||||
checkpointClient := &fakeautoscalingv1.FakeAutoscalingV1{Fake: &core.Fake{}}
|
checkpointClient := &fakeautoscalingv1.FakeAutoscalingV1{Fake: &core.Fake{}}
|
||||||
checkpointClient.AddReactor("list", "verticalpodautoscalercheckpoints", func(action core.Action) (bool, runtime.Object, error) {
|
// Track deleted checkpoints
|
||||||
return true, checkpoints, nil
|
|
||||||
})
|
|
||||||
|
|
||||||
deletedCheckpoints := []string{}
|
deletedCheckpoints := []string{}
|
||||||
checkpointClient.AddReactor("delete", "verticalpodautoscalercheckpoints", func(action core.Action) (bool, runtime.Object, error) {
|
checkpointClient.AddReactor("delete", "verticalpodautoscalercheckpoints", func(action core.Action) (bool, runtime.Object, error) {
|
||||||
deleteAction := action.(core.DeleteAction)
|
deleteAction := action.(core.DeleteAction)
|
||||||
deletedCheckpoints = append(deletedCheckpoints, deleteAction.GetName())
|
deletedCheckpoints = append(deletedCheckpoints, deleteAction.GetName())
|
||||||
|
|
||||||
return true, nil, nil
|
return true, nil, nil
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Create namespace lister mock that will return the checkpoint list
|
||||||
|
checkpointNamespaceLister := &test.VerticalPodAutoscalerCheckPointListerMock{}
|
||||||
|
checkpointNamespaceLister.On("List").Return(vpacheckpoints, nil)
|
||||||
|
|
||||||
|
// Create main checkpoint mock that will return the namespace lister
|
||||||
|
checkpointLister := &test.VerticalPodAutoscalerCheckPointListerMock{}
|
||||||
|
checkpointLister.On("VerticalPodAutoscalerCheckpoints", namespace).Return(checkpointNamespaceLister)
|
||||||
|
|
||||||
feeder := clusterStateFeeder{
|
feeder := clusterStateFeeder{
|
||||||
coreClient: client.CoreV1(),
|
coreClient: client.CoreV1(),
|
||||||
vpaLister: vpaLister,
|
vpaLister: vpaLister,
|
||||||
vpaCheckpointClient: checkpointClient,
|
vpaCheckpointClient: checkpointClient,
|
||||||
|
vpaCheckpointLister: checkpointLister,
|
||||||
clusterState: model.NewClusterState(testGcPeriod),
|
clusterState: model.NewClusterState(testGcPeriod),
|
||||||
recommenderName: "default",
|
recommenderName: "default",
|
||||||
}
|
}
|
||||||
|
|
|
@ -297,6 +297,7 @@ func run(ctx context.Context, healthCheck *metrics.HealthCheck, commonFlag *comm
|
||||||
MetricsClient: input_metrics.NewMetricsClient(source, commonFlag.VpaObjectNamespace, "default-metrics-client"),
|
MetricsClient: input_metrics.NewMetricsClient(source, commonFlag.VpaObjectNamespace, "default-metrics-client"),
|
||||||
VpaCheckpointClient: vpa_clientset.NewForConfigOrDie(config).AutoscalingV1(),
|
VpaCheckpointClient: vpa_clientset.NewForConfigOrDie(config).AutoscalingV1(),
|
||||||
VpaLister: vpa_api_util.NewVpasLister(vpa_clientset.NewForConfigOrDie(config), make(chan struct{}), commonFlag.VpaObjectNamespace),
|
VpaLister: vpa_api_util.NewVpasLister(vpa_clientset.NewForConfigOrDie(config), make(chan struct{}), commonFlag.VpaObjectNamespace),
|
||||||
|
VpaCheckpointLister: vpa_api_util.NewVpaCheckpointLister(vpa_clientset.NewForConfigOrDie(config), make(chan struct{}), commonFlag.VpaObjectNamespace),
|
||||||
ClusterState: clusterState,
|
ClusterState: clusterState,
|
||||||
SelectorFetcher: target.NewVpaTargetSelectorFetcher(config, kubeClient, factory),
|
SelectorFetcher: target.NewVpaTargetSelectorFetcher(config, kubeClient, factory),
|
||||||
MemorySaveMode: *memorySaver,
|
MemorySaveMode: *memorySaver,
|
||||||
|
|
|
@ -230,7 +230,8 @@ func (u *updater) RunOnce(ctx context.Context) {
|
||||||
// to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode
|
// to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode
|
||||||
for vpa, livePods := range controlledPods {
|
for vpa, livePods := range controlledPods {
|
||||||
vpaSize := len(livePods)
|
vpaSize := len(livePods)
|
||||||
controlledPodsCounter.Add(vpaSize, vpaSize)
|
updateMode := vpa_api_util.GetUpdateMode(vpa)
|
||||||
|
controlledPodsCounter.Add(vpaSize, updateMode, vpaSize)
|
||||||
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := u.restrictionFactory.GetCreatorMaps(livePods, vpa)
|
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := u.restrictionFactory.GetCreatorMaps(livePods, vpa)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
klog.ErrorS(err, "Failed to get creator maps")
|
klog.ErrorS(err, "Failed to get creator maps")
|
||||||
|
@ -242,7 +243,6 @@ func (u *updater) RunOnce(ctx context.Context) {
|
||||||
|
|
||||||
podsForInPlace := make([]*apiv1.Pod, 0)
|
podsForInPlace := make([]*apiv1.Pod, 0)
|
||||||
podsForEviction := make([]*apiv1.Pod, 0)
|
podsForEviction := make([]*apiv1.Pod, 0)
|
||||||
updateMode := vpa_api_util.GetUpdateMode(vpa)
|
|
||||||
|
|
||||||
if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) {
|
if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) {
|
||||||
podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa)
|
podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa)
|
||||||
|
@ -253,7 +253,7 @@ func (u *updater) RunOnce(ctx context.Context) {
|
||||||
klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate)
|
klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate)
|
||||||
}
|
}
|
||||||
podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa)
|
podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa)
|
||||||
evictablePodsCounter.Add(vpaSize, len(podsForEviction))
|
evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction))
|
||||||
}
|
}
|
||||||
|
|
||||||
withInPlaceUpdatable := false
|
withInPlaceUpdatable := false
|
||||||
|
@ -304,7 +304,7 @@ func (u *updater) RunOnce(ctx context.Context) {
|
||||||
klog.V(0).InfoS("Eviction failed", "error", evictErr, "pod", klog.KObj(pod))
|
klog.V(0).InfoS("Eviction failed", "error", evictErr, "pod", klog.KObj(pod))
|
||||||
} else {
|
} else {
|
||||||
withEvicted = true
|
withEvicted = true
|
||||||
metrics_updater.AddEvictedPod(vpaSize)
|
metrics_updater.AddEvictedPod(vpaSize, updateMode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -315,10 +315,10 @@ func (u *updater) RunOnce(ctx context.Context) {
|
||||||
vpasWithInPlaceUpdatedPodsCounter.Add(vpaSize, 1)
|
vpasWithInPlaceUpdatedPodsCounter.Add(vpaSize, 1)
|
||||||
}
|
}
|
||||||
if withEvictable {
|
if withEvictable {
|
||||||
vpasWithEvictablePodsCounter.Add(vpaSize, 1)
|
vpasWithEvictablePodsCounter.Add(vpaSize, updateMode, 1)
|
||||||
}
|
}
|
||||||
if withEvicted {
|
if withEvicted {
|
||||||
vpasWithEvictedPodsCounter.Add(vpaSize, 1)
|
vpasWithEvictedPodsCounter.Add(vpaSize, updateMode, 1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
timer.ObserveStep("EvictPods")
|
timer.ObserveStep("EvictPods")
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
|
||||||
|
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
|
||||||
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
|
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -35,13 +36,20 @@ type SizeBasedGauge struct {
|
||||||
gauge *prometheus.GaugeVec
|
gauge *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UpdateModeAndSizeBasedGauge is a wrapper for incrementally recording values
|
||||||
|
// indexed by log2(VPA size) and update mode
|
||||||
|
type UpdateModeAndSizeBasedGauge struct {
|
||||||
|
values [metrics.MaxVpaSizeLog]map[vpa_types.UpdateMode]int
|
||||||
|
gauge *prometheus.GaugeVec
|
||||||
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
controlledCount = prometheus.NewGaugeVec(
|
controlledCount = prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Namespace: metricsNamespace,
|
Namespace: metricsNamespace,
|
||||||
Name: "controlled_pods_total",
|
Name: "controlled_pods_total",
|
||||||
Help: "Number of Pods controlled by VPA updater.",
|
Help: "Number of Pods controlled by VPA updater.",
|
||||||
}, []string{"vpa_size_log2"},
|
}, []string{"vpa_size_log2", "update_mode"},
|
||||||
)
|
)
|
||||||
|
|
||||||
evictableCount = prometheus.NewGaugeVec(
|
evictableCount = prometheus.NewGaugeVec(
|
||||||
|
@ -49,7 +57,7 @@ var (
|
||||||
Namespace: metricsNamespace,
|
Namespace: metricsNamespace,
|
||||||
Name: "evictable_pods_total",
|
Name: "evictable_pods_total",
|
||||||
Help: "Number of Pods matching evicition criteria.",
|
Help: "Number of Pods matching evicition criteria.",
|
||||||
}, []string{"vpa_size_log2"},
|
}, []string{"vpa_size_log2", "update_mode"},
|
||||||
)
|
)
|
||||||
|
|
||||||
evictedCount = prometheus.NewCounterVec(
|
evictedCount = prometheus.NewCounterVec(
|
||||||
|
@ -57,7 +65,7 @@ var (
|
||||||
Namespace: metricsNamespace,
|
Namespace: metricsNamespace,
|
||||||
Name: "evicted_pods_total",
|
Name: "evicted_pods_total",
|
||||||
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
|
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
|
||||||
}, []string{"vpa_size_log2"},
|
}, []string{"vpa_size_log2", "update_mode"},
|
||||||
)
|
)
|
||||||
|
|
||||||
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
|
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
|
||||||
|
@ -65,7 +73,7 @@ var (
|
||||||
Namespace: metricsNamespace,
|
Namespace: metricsNamespace,
|
||||||
Name: "vpas_with_evictable_pods_total",
|
Name: "vpas_with_evictable_pods_total",
|
||||||
Help: "Number of VPA objects with at least one Pod matching evicition criteria.",
|
Help: "Number of VPA objects with at least one Pod matching evicition criteria.",
|
||||||
}, []string{"vpa_size_log2"},
|
}, []string{"vpa_size_log2", "update_mode"},
|
||||||
)
|
)
|
||||||
|
|
||||||
vpasWithEvictedPodsCount = prometheus.NewGaugeVec(
|
vpasWithEvictedPodsCount = prometheus.NewGaugeVec(
|
||||||
|
@ -73,7 +81,7 @@ var (
|
||||||
Namespace: metricsNamespace,
|
Namespace: metricsNamespace,
|
||||||
Name: "vpas_with_evicted_pods_total",
|
Name: "vpas_with_evicted_pods_total",
|
||||||
Help: "Number of VPA objects with at least one evicted Pod.",
|
Help: "Number of VPA objects with at least one evicted Pod.",
|
||||||
}, []string{"vpa_size_log2"},
|
}, []string{"vpa_size_log2", "update_mode"},
|
||||||
)
|
)
|
||||||
|
|
||||||
inPlaceUpdatableCount = prometheus.NewGaugeVec(
|
inPlaceUpdatableCount = prometheus.NewGaugeVec(
|
||||||
|
@ -138,30 +146,41 @@ func newSizeBasedGauge(gauge *prometheus.GaugeVec) *SizeBasedGauge {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newModeAndSizeBasedGauge provides a wrapper for counting items in a loop
|
||||||
|
func newModeAndSizeBasedGauge(gauge *prometheus.GaugeVec) *UpdateModeAndSizeBasedGauge {
|
||||||
|
g := &UpdateModeAndSizeBasedGauge{
|
||||||
|
gauge: gauge,
|
||||||
|
}
|
||||||
|
for i := range g.values {
|
||||||
|
g.values[i] = make(map[vpa_types.UpdateMode]int)
|
||||||
|
}
|
||||||
|
return g
|
||||||
|
}
|
||||||
|
|
||||||
// NewControlledPodsCounter returns a wrapper for counting Pods controlled by Updater
|
// NewControlledPodsCounter returns a wrapper for counting Pods controlled by Updater
|
||||||
func NewControlledPodsCounter() *SizeBasedGauge {
|
func NewControlledPodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||||
return newSizeBasedGauge(controlledCount)
|
return newModeAndSizeBasedGauge(controlledCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewEvictablePodsCounter returns a wrapper for counting Pods which are matching eviction criteria
|
// NewEvictablePodsCounter returns a wrapper for counting Pods which are matching eviction criteria
|
||||||
func NewEvictablePodsCounter() *SizeBasedGauge {
|
func NewEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||||
return newSizeBasedGauge(evictableCount)
|
return newModeAndSizeBasedGauge(evictableCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewVpasWithEvictablePodsCounter returns a wrapper for counting VPA objects with Pods matching eviction criteria
|
// NewVpasWithEvictablePodsCounter returns a wrapper for counting VPA objects with Pods matching eviction criteria
|
||||||
func NewVpasWithEvictablePodsCounter() *SizeBasedGauge {
|
func NewVpasWithEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||||
return newSizeBasedGauge(vpasWithEvictablePodsCount)
|
return newModeAndSizeBasedGauge(vpasWithEvictablePodsCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewVpasWithEvictedPodsCounter returns a wrapper for counting VPA objects with evicted Pods
|
// NewVpasWithEvictedPodsCounter returns a wrapper for counting VPA objects with evicted Pods
|
||||||
func NewVpasWithEvictedPodsCounter() *SizeBasedGauge {
|
func NewVpasWithEvictedPodsCounter() *UpdateModeAndSizeBasedGauge {
|
||||||
return newSizeBasedGauge(vpasWithEvictedPodsCount)
|
return newModeAndSizeBasedGauge(vpasWithEvictedPodsCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
|
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
|
||||||
func AddEvictedPod(vpaSize int) {
|
func AddEvictedPod(vpaSize int, mode vpa_types.UpdateMode) {
|
||||||
log2 := metrics.GetVpaSizeLog2(vpaSize)
|
log2 := metrics.GetVpaSizeLog2(vpaSize)
|
||||||
evictedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
|
evictedCount.WithLabelValues(strconv.Itoa(log2), string(mode)).Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInPlaceUpdatablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria
|
// NewInPlaceUpdatablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria
|
||||||
|
@ -203,3 +222,19 @@ func (g *SizeBasedGauge) Observe() {
|
||||||
g.gauge.WithLabelValues(strconv.Itoa(log2)).Set(float64(value))
|
g.gauge.WithLabelValues(strconv.Itoa(log2)).Set(float64(value))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add increases the counter for the given VPA size and VPA update mode.
|
||||||
|
func (g *UpdateModeAndSizeBasedGauge) Add(vpaSize int, vpaUpdateMode vpa_types.UpdateMode, value int) {
|
||||||
|
log2 := metrics.GetVpaSizeLog2(vpaSize)
|
||||||
|
g.values[log2][vpaUpdateMode] += value
|
||||||
|
}
|
||||||
|
|
||||||
|
// Observe stores the recorded values into metrics object associated with the
|
||||||
|
// wrapper
|
||||||
|
func (g *UpdateModeAndSizeBasedGauge) Observe() {
|
||||||
|
for log2, valueMap := range g.values {
|
||||||
|
for vpaMode, value := range valueMap {
|
||||||
|
g.gauge.WithLabelValues(strconv.Itoa(log2), string(vpaMode)).Set(float64(value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,297 @@
|
||||||
|
/*
|
||||||
|
Copyright 2025 The Kubernetes Authors.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package updater
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/testutil"
|
||||||
|
|
||||||
|
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAddEvictedPod(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
vpaSize int
|
||||||
|
mode vpa_types.UpdateMode
|
||||||
|
log2 string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "VPA size 5, mode Auto",
|
||||||
|
vpaSize: 5,
|
||||||
|
mode: vpa_types.UpdateModeAuto,
|
||||||
|
log2: "2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VPA size 10, mode Off",
|
||||||
|
vpaSize: 10,
|
||||||
|
mode: vpa_types.UpdateModeOff,
|
||||||
|
log2: "3",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
t.Cleanup(evictedCount.Reset)
|
||||||
|
AddEvictedPod(tc.vpaSize, tc.mode)
|
||||||
|
val := testutil.ToFloat64(evictedCount.WithLabelValues(tc.log2, string(tc.mode)))
|
||||||
|
if val != 1 {
|
||||||
|
t.Errorf("Unexpected value for evictedCount metric with labels (%s, %s): got %v, want 1", tc.log2, string(tc.mode), val)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAddInPlaceUpdatedPod(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
vpaSize int
|
||||||
|
log2 string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "VPA size 10",
|
||||||
|
vpaSize: 10,
|
||||||
|
log2: "3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VPA size 1",
|
||||||
|
vpaSize: 1,
|
||||||
|
log2: "0",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
t.Cleanup(inPlaceUpdatedCount.Reset)
|
||||||
|
AddInPlaceUpdatedPod(tc.vpaSize)
|
||||||
|
val := testutil.ToFloat64(inPlaceUpdatedCount.WithLabelValues(tc.log2))
|
||||||
|
if val != 1 {
|
||||||
|
t.Errorf("Unexpected value for InPlaceUpdatedPod metric with labels (%s): got %v, want 1", tc.log2, val)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordFailedInPlaceUpdate(t *testing.T) {
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
vpaSize int
|
||||||
|
reason string
|
||||||
|
log2 string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "VPA size 2, some reason",
|
||||||
|
vpaSize: 2,
|
||||||
|
reason: "some_reason",
|
||||||
|
log2: "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VPA size 20, another reason",
|
||||||
|
vpaSize: 20,
|
||||||
|
reason: "another_reason",
|
||||||
|
log2: "4",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
t.Cleanup(failedInPlaceUpdateAttempts.Reset)
|
||||||
|
RecordFailedInPlaceUpdate(tc.vpaSize, tc.reason)
|
||||||
|
val := testutil.ToFloat64(failedInPlaceUpdateAttempts.WithLabelValues(tc.log2, tc.reason))
|
||||||
|
if val != 1 {
|
||||||
|
t.Errorf("Unexpected value for FailedInPlaceUpdate metric with labels (%s, %s): got %v, want 1", tc.log2, tc.reason, val)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestUpdateModeAndSizeBasedGauge(t *testing.T) {
|
||||||
|
type addition struct {
|
||||||
|
vpaSize int
|
||||||
|
mode vpa_types.UpdateMode
|
||||||
|
value int
|
||||||
|
}
|
||||||
|
type expectation struct {
|
||||||
|
labels []string
|
||||||
|
value float64
|
||||||
|
}
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
newCounter func() *UpdateModeAndSizeBasedGauge
|
||||||
|
metric *prometheus.GaugeVec
|
||||||
|
metricName string
|
||||||
|
additions []addition
|
||||||
|
expectedMetrics []expectation
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "ControlledPodsCounter",
|
||||||
|
newCounter: NewControlledPodsCounter,
|
||||||
|
metric: controlledCount,
|
||||||
|
metricName: "vpa_updater_controlled_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{1, vpa_types.UpdateModeAuto, 5},
|
||||||
|
{2, vpa_types.UpdateModeOff, 10},
|
||||||
|
{2, vpa_types.UpdateModeAuto, 2},
|
||||||
|
{2, vpa_types.UpdateModeAuto, 7},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"0" /* log2(1) */, "Auto"}, 5},
|
||||||
|
{[]string{"1" /* log2(2) */, "Auto"}, 9},
|
||||||
|
{[]string{"1" /* log2(2) */, "Off"}, 10},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "EvictablePodsCounter",
|
||||||
|
newCounter: NewEvictablePodsCounter,
|
||||||
|
metric: evictableCount,
|
||||||
|
metricName: "vpa_updater_evictable_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{4, vpa_types.UpdateModeAuto, 3},
|
||||||
|
{1, vpa_types.UpdateModeRecreate, 8},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"2" /* log2(4) */, "Auto"}, 3},
|
||||||
|
{[]string{"0" /* log2(1) */, "Recreate"}, 8},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VpasWithEvictablePodsCounter",
|
||||||
|
newCounter: NewVpasWithEvictablePodsCounter,
|
||||||
|
metric: vpasWithEvictablePodsCount,
|
||||||
|
metricName: "vpa_updater_vpas_with_evictable_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{1, vpa_types.UpdateModeOff, 1},
|
||||||
|
{2, vpa_types.UpdateModeAuto, 1},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"0" /* log2(1) */, "Off"}, 1},
|
||||||
|
{[]string{"1" /* log2(2) */, "Auto"}, 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VpasWithEvictedPodsCounter",
|
||||||
|
newCounter: NewVpasWithEvictedPodsCounter,
|
||||||
|
metric: vpasWithEvictedPodsCount,
|
||||||
|
metricName: "vpa_updater_vpas_with_evicted_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{1, vpa_types.UpdateModeAuto, 2},
|
||||||
|
{1, vpa_types.UpdateModeAuto, 3},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"0" /* log2(1) */, "Auto"}, 5},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
t.Cleanup(tc.metric.Reset)
|
||||||
|
counter := tc.newCounter()
|
||||||
|
for _, add := range tc.additions {
|
||||||
|
counter.Add(add.vpaSize, add.mode, add.value)
|
||||||
|
}
|
||||||
|
counter.Observe()
|
||||||
|
for _, expected := range tc.expectedMetrics {
|
||||||
|
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
|
||||||
|
if val != expected.value {
|
||||||
|
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSizeBasedGauge(t *testing.T) {
|
||||||
|
type addition struct {
|
||||||
|
vpaSize int
|
||||||
|
value int
|
||||||
|
}
|
||||||
|
type expectation struct {
|
||||||
|
labels []string
|
||||||
|
value float64
|
||||||
|
}
|
||||||
|
testCases := []struct {
|
||||||
|
desc string
|
||||||
|
newCounter func() *SizeBasedGauge
|
||||||
|
metric *prometheus.GaugeVec
|
||||||
|
metricName string
|
||||||
|
additions []addition
|
||||||
|
expectedMetrics []expectation
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
desc: "InPlaceUpdatablePodsCounter",
|
||||||
|
newCounter: NewInPlaceUpdatablePodsCounter,
|
||||||
|
metric: inPlaceUpdatableCount,
|
||||||
|
metricName: "vpa_updater_in_place_updatable_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{1, 5},
|
||||||
|
{2, 10},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"0" /* log2(1) */}, 5},
|
||||||
|
{[]string{"1" /* log2(2) */}, 10},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VpasWithInPlaceUpdatablePodsCounter",
|
||||||
|
newCounter: NewVpasWithInPlaceUpdatablePodsCounter,
|
||||||
|
metric: vpasWithInPlaceUpdatablePodsCount,
|
||||||
|
metricName: "vpa_updater_vpas_with_in_place_updatable_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{10, 1},
|
||||||
|
{20, 1},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"3" /* log2(10) */}, 1},
|
||||||
|
{[]string{"4" /* log2(20) */}, 1},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
desc: "VpasWithInPlaceUpdatedPodsCounter",
|
||||||
|
newCounter: NewVpasWithInPlaceUpdatedPodsCounter,
|
||||||
|
metric: vpasWithInPlaceUpdatedPodsCount,
|
||||||
|
metricName: "vpa_updater_vpas_with_in_place_updated_pods_total",
|
||||||
|
additions: []addition{
|
||||||
|
{2, 4},
|
||||||
|
{4, 5},
|
||||||
|
},
|
||||||
|
expectedMetrics: []expectation{
|
||||||
|
{[]string{"1" /* log2(2) */}, 4},
|
||||||
|
{[]string{"2" /* log2(4) */}, 5},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.desc, func(t *testing.T) {
|
||||||
|
t.Cleanup(tc.metric.Reset)
|
||||||
|
counter := tc.newCounter()
|
||||||
|
for _, add := range tc.additions {
|
||||||
|
counter.Add(add.vpaSize, add.value)
|
||||||
|
}
|
||||||
|
counter.Observe()
|
||||||
|
for _, expected := range tc.expectedMetrics {
|
||||||
|
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
|
||||||
|
if val != expected.value {
|
||||||
|
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -200,6 +200,36 @@ func (m *VerticalPodAutoscalerListerMock) Get(name string) (*vpa_types.VerticalP
|
||||||
return nil, fmt.Errorf("unimplemented")
|
return nil, fmt.Errorf("unimplemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VerticalPodAutoscalerCheckPointListerMock is a mock of VerticalPodAutoscalerCheckPointLister
|
||||||
|
type VerticalPodAutoscalerCheckPointListerMock struct {
|
||||||
|
mock.Mock
|
||||||
|
}
|
||||||
|
|
||||||
|
// List is a mock implementation of VerticalPodAutoscalerLister.List
|
||||||
|
func (m *VerticalPodAutoscalerCheckPointListerMock) List(selector labels.Selector) (ret []*vpa_types.VerticalPodAutoscalerCheckpoint, err error) {
|
||||||
|
args := m.Called()
|
||||||
|
var returnArg []*vpa_types.VerticalPodAutoscalerCheckpoint
|
||||||
|
if args.Get(0) != nil {
|
||||||
|
returnArg = args.Get(0).([]*vpa_types.VerticalPodAutoscalerCheckpoint)
|
||||||
|
}
|
||||||
|
return returnArg, args.Error(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// VerticalPodAutoscalerCheckpoints is a mock implementation of returning a lister for namespace.
|
||||||
|
func (m *VerticalPodAutoscalerCheckPointListerMock) VerticalPodAutoscalerCheckpoints(namespace string) vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister {
|
||||||
|
args := m.Called(namespace)
|
||||||
|
var returnArg vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister
|
||||||
|
if args.Get(0) != nil {
|
||||||
|
returnArg = args.Get(0).(vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister)
|
||||||
|
}
|
||||||
|
return returnArg
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get is not implemented for this mock
|
||||||
|
func (m *VerticalPodAutoscalerCheckPointListerMock) Get(name string) (*vpa_types.VerticalPodAutoscalerCheckpoint, error) {
|
||||||
|
return nil, fmt.Errorf("unimplemented")
|
||||||
|
}
|
||||||
|
|
||||||
// VerticalPodAutoscalerV1Beta1ListerMock is a mock of VerticalPodAutoscalerLister or
|
// VerticalPodAutoscalerV1Beta1ListerMock is a mock of VerticalPodAutoscalerLister or
|
||||||
// VerticalPodAutoscalerNamespaceLister - the crucial List method is the same.
|
// VerticalPodAutoscalerNamespaceLister - the crucial List method is the same.
|
||||||
type VerticalPodAutoscalerV1Beta1ListerMock struct {
|
type VerticalPodAutoscalerV1Beta1ListerMock struct {
|
||||||
|
|
|
@ -107,6 +107,36 @@ func NewVpasLister(vpaClient *vpa_clientset.Clientset, stopChannel <-chan struct
|
||||||
return vpaLister
|
return vpaLister
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewVpaCheckpointLister returns VerticalPodAutoscalerCheckpointLister configured to fetch all VPACheckpoint objects from namespace,
|
||||||
|
// set namespace to k8sapiv1.NamespaceAll to select all namespaces.
|
||||||
|
// The method blocks until vpaCheckpointLister is initially populated.
|
||||||
|
func NewVpaCheckpointLister(vpaClient *vpa_clientset.Clientset, stopChannel <-chan struct{}, namespace string) vpa_lister.VerticalPodAutoscalerCheckpointLister {
|
||||||
|
vpaListWatch := cache.NewListWatchFromClient(vpaClient.AutoscalingV1().RESTClient(), "verticalpodautoscalercheckpoints", namespace, fields.Everything())
|
||||||
|
informerOptions := cache.InformerOptions{
|
||||||
|
ObjectType: &vpa_types.VerticalPodAutoscalerCheckpoint{},
|
||||||
|
ListerWatcher: vpaListWatch,
|
||||||
|
Handler: &cache.ResourceEventHandlerFuncs{},
|
||||||
|
ResyncPeriod: 1 * time.Hour,
|
||||||
|
Indexers: cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc},
|
||||||
|
}
|
||||||
|
|
||||||
|
store, controller := cache.NewInformerWithOptions(informerOptions)
|
||||||
|
indexer, ok := store.(cache.Indexer)
|
||||||
|
if !ok {
|
||||||
|
klog.ErrorS(nil, "Expected Indexer, but got a Store that does not implement Indexer")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
|
}
|
||||||
|
vpaCheckpointLister := vpa_lister.NewVerticalPodAutoscalerCheckpointLister(indexer)
|
||||||
|
go controller.Run(stopChannel)
|
||||||
|
if !cache.WaitForCacheSync(stopChannel, controller.HasSynced) {
|
||||||
|
klog.ErrorS(nil, "Failed to sync VPA checkpoint cache during initialization")
|
||||||
|
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
|
||||||
|
} else {
|
||||||
|
klog.InfoS("Initial VPA checkpoint synced successfully")
|
||||||
|
}
|
||||||
|
return vpaCheckpointLister
|
||||||
|
}
|
||||||
|
|
||||||
// PodMatchesVPA returns true iff the vpaWithSelector matches the Pod.
|
// PodMatchesVPA returns true iff the vpaWithSelector matches the Pod.
|
||||||
func PodMatchesVPA(pod *core.Pod, vpaWithSelector *VpaWithSelector) bool {
|
func PodMatchesVPA(pod *core.Pod, vpaWithSelector *VpaWithSelector) bool {
|
||||||
return PodLabelsMatchVPA(pod.Namespace, labels.Set(pod.GetLabels()), vpaWithSelector.Vpa.Namespace, vpaWithSelector.Selector)
|
return PodLabelsMatchVPA(pod.Namespace, labels.Set(pod.GetLabels()), vpaWithSelector.Vpa.Namespace, vpaWithSelector.Selector)
|
||||||
|
|
Loading…
Reference in New Issue