Compare commits

..

No commits in common. "master" and "cluster-autoscaler-chart-9.48.0" have entirely different histories.

32 changed files with 147 additions and 1173 deletions

View File

@ -23,7 +23,7 @@ We'd love to accept your patches! Before we can take them, we have to jump a cou
All changes must be code reviewed. Coding conventions and standards are explained in the official
[developer docs](https://github.com/kubernetes/community/tree/master/contributors/devel). Expect
reviewers to request that you avoid common [go style mistakes](https://go.dev/wiki/CodeReviewComments)
reviewers to request that you avoid common [go style mistakes](https://github.com/golang/go/wiki/CodeReviewComments)
in your PRs.
### Merge Approval

View File

@ -107,20 +107,7 @@ build-in-docker-arch-%: clean-arch-% docker-builder
docker run ${RM_FLAG} -v `pwd`:/gopath/src/k8s.io/autoscaler/cluster-autoscaler/:Z autoscaling-builder:latest \
bash -c 'cd /gopath/src/k8s.io/autoscaler/cluster-autoscaler && BUILD_TAGS=${BUILD_TAGS} LDFLAGS="${LDFLAGS}" make build-arch-$*'
release-extract-version = $(shell cat version/version.go | grep "Version =" | cut -d '"' -f 2)
release-validate:
@if [ -z $(shell git tag --points-at HEAD | grep -e ^cluster-autoscaler-1.[1-9][0-9]*.[0-9][0-9]*$) ]; then \
echo "Can't release from this commit, there is no compatible git tag"; \
exit 1; \
fi
@if [ -z $(shell git tag --points-at HEAD | grep -e $(call release-extract-version)) ]; then \
echo "Can't release from this commit, git tag does not match version/version.go"; \
exit 1; \
fi
release: TAG=v$(call release-extract-version)
release: release-validate $(addprefix build-in-docker-arch-,$(ALL_ARCH)) execute-release
release: $(addprefix build-in-docker-arch-,$(ALL_ARCH)) execute-release
@echo "Full in-docker release ${TAG}${FOR_PROVIDER} completed"
container: container-arch-$(GOARCH)

View File

@ -10,6 +10,5 @@ reviewers:
- feiskyer
- vadasambar
- x13n
- elmiko
labels:
- area/cluster-autoscaler

View File

@ -103,9 +103,6 @@ type Config struct {
// EnableFastDeleteOnFailedProvisioning defines whether to delete the experimental faster VMSS instance deletion on failed provisioning
EnableFastDeleteOnFailedProvisioning bool `json:"enableFastDeleteOnFailedProvisioning,omitempty" yaml:"enableFastDeleteOnFailedProvisioning,omitempty"`
// EnableLabelPredictionsOnTemplate defines whether to enable label predictions on the template when scaling from zero
EnableLabelPredictionsOnTemplate bool `json:"enableLabelPredictionsOnTemplate,omitempty" yaml:"enableLabelPredictionsOnTemplate,omitempty"`
}
// These are only here for backward compabitility. Their equivalent exists in providerazure.Config with a different name.
@ -136,7 +133,6 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
cfg.VMType = providerazureconsts.VMTypeVMSS
cfg.MaxDeploymentsCount = int64(defaultMaxDeploymentsCount)
cfg.StrictCacheUpdates = false
cfg.EnableLabelPredictionsOnTemplate = true
// Config file overrides defaults
if configReader != nil {
@ -312,9 +308,6 @@ func BuildAzureConfig(configReader io.Reader) (*Config, error) {
if _, err = assignBoolFromEnvIfExists(&cfg.EnableFastDeleteOnFailedProvisioning, "AZURE_ENABLE_FAST_DELETE_ON_FAILED_PROVISIONING"); err != nil {
return nil, err
}
if _, err = assignBoolFromEnvIfExists(&cfg.EnableLabelPredictionsOnTemplate, "AZURE_ENABLE_LABEL_PREDICTIONS_ON_TEMPLATE"); err != nil {
return nil, err
}
// Nonstatic defaults
cfg.VMType = strings.ToLower(cfg.VMType)

View File

@ -89,8 +89,6 @@ type ScaleSet struct {
dedicatedHost bool
enableFastDeleteOnFailedProvisioning bool
enableLabelPredictionsOnTemplate bool
}
// NewScaleSet creates a new NewScaleSet.
@ -110,11 +108,10 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager, curSize int64, d
instancesRefreshJitter: az.config.VmssVmsCacheJitter,
},
enableForceDelete: az.config.EnableForceDelete,
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
enableDetailedCSEMessage: az.config.EnableDetailedCSEMessage,
enableLabelPredictionsOnTemplate: az.config.EnableLabelPredictionsOnTemplate,
dedicatedHost: dedicatedHost,
enableForceDelete: az.config.EnableForceDelete,
enableDynamicInstanceList: az.config.EnableDynamicInstanceList,
enableDetailedCSEMessage: az.config.EnableDetailedCSEMessage,
dedicatedHost: dedicatedHost,
}
if az.config.VmssVirtualMachinesCacheTTLInSeconds != 0 {
@ -665,7 +662,7 @@ func (scaleSet *ScaleSet) TemplateNodeInfo() (*framework.NodeInfo, error) {
if err != nil {
return nil, err
}
node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList, scaleSet.enableLabelPredictionsOnTemplate)
node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)
if err != nil {
return nil, err
}

View File

@ -211,7 +211,7 @@ func buildNodeTemplateFromVMPool(vmsPool armcontainerservice.AgentPool, location
}, nil
}
func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager *AzureManager, enableDynamicInstanceList bool, enableLabelPrediction bool) (*apiv1.Node, error) {
func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
node := apiv1.Node{}
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())
@ -272,7 +272,7 @@ func buildNodeFromTemplate(nodeGroupName string, template NodeTemplate, manager
node.Status.Allocatable = node.Status.Capacity
if template.VMSSNodeTemplate != nil {
node = processVMSSTemplate(template, nodeName, node, enableLabelPrediction)
node = processVMSSTemplate(template, nodeName, node)
} else if template.VMPoolNodeTemplate != nil {
node = processVMPoolTemplate(template, nodeName, node)
} else {
@ -298,7 +298,7 @@ func processVMPoolTemplate(template NodeTemplate, nodeName string, node apiv1.No
return node
}
func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node, enableLabelPrediction bool) apiv1.Node {
func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node) apiv1.Node {
// NodeLabels
if template.VMSSNodeTemplate.Tags != nil {
for k, v := range template.VMSSNodeTemplate.Tags {
@ -324,50 +324,45 @@ func processVMSSTemplate(template NodeTemplate, nodeName string, node apiv1.Node
labels = extractLabelsFromTags(template.VMSSNodeTemplate.Tags)
}
// This is the best-effort to match AKS system labels,
// this prediction needs to be constantly worked on and maintained to keep up with the changes in AKS
if enableLabelPrediction {
// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}
// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}
// Add the storage profile and storage tier labels for vmss node
if template.VMSSNodeTemplate.OSDisk != nil {
// ephemeral
if template.VMSSNodeTemplate.OSDisk.DiffDiskSettings != nil && template.VMSSNodeTemplate.OSDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VMSSNodeTemplate.OSDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
}
// Add the storage profile and storage tier labels for vmss node
if template.VMSSNodeTemplate.OSDisk != nil {
// ephemeral
if template.VMSSNodeTemplate.OSDisk.DiffDiskSettings != nil && template.VMSSNodeTemplate.OSDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(template.SkuName) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
if template.VMSSNodeTemplate.OSDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VMSSNodeTemplate.OSDisk.ManagedDisk.StorageAccountType)
}
// Add ephemeral-storage value
if template.VMSSNodeTemplate.OSDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VMSSNodeTemplate.OSDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VMSSNodeTemplate.OSDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
}
}
// Add ephemeral-storage value
if template.VMSSNodeTemplate.OSDisk != nil && template.VMSSNodeTemplate.OSDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VMSSNodeTemplate.OSDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VMSSNodeTemplate.OSDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(template.SkuName) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}
// Extract allocatables from tags

View File

@ -291,91 +291,3 @@ func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
}
return set
}
func TestBuildNodeFromTemplateWithLabelPrediction(t *testing.T) {
poolName := "testpool"
testSkuName := "Standard_DS2_v2"
testNodeName := "test-node"
vmss := compute.VirtualMachineScaleSet{
Response: autorest.Response{},
Sku: &compute.Sku{Name: &testSkuName},
Plan: nil,
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{
StorageProfile: &compute.VirtualMachineScaleSetStorageProfile{
OsDisk: &compute.VirtualMachineScaleSetOSDisk{
DiffDiskSettings: nil, // This makes it managed
ManagedDisk: &compute.VirtualMachineScaleSetManagedDiskParameters{
StorageAccountType: compute.StorageAccountTypesPremiumLRS,
},
},
},
},
},
Tags: map[string]*string{
"poolName": &poolName,
},
Zones: &[]string{"1", "2"},
Location: to.StringPtr("westus"),
}
template, err := buildNodeTemplateFromVMSS(vmss, map[string]string{}, "")
assert.NoError(t, err)
manager := &AzureManager{}
node, err := buildNodeFromTemplate(testNodeName, template, manager, false, true)
assert.NoError(t, err)
assert.NotNil(t, node)
// Verify label prediction labels are added
assert.Equal(t, poolName, node.Labels["agentpool"])
assert.Equal(t, poolName, node.Labels["kubernetes.azure.com/agentpool"])
assert.Equal(t, "managed", node.Labels["storageprofile"])
assert.Equal(t, "managed", node.Labels["kubernetes.azure.com/storageprofile"])
}
func TestBuildNodeFromTemplateWithEphemeralStorage(t *testing.T) {
poolName := "testpool"
testSkuName := "Standard_DS2_v2"
testNodeName := "test-node"
diskSizeGB := int32(128)
vmss := compute.VirtualMachineScaleSet{
Response: autorest.Response{},
Sku: &compute.Sku{Name: &testSkuName},
Plan: nil,
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{
StorageProfile: &compute.VirtualMachineScaleSetStorageProfile{
OsDisk: &compute.VirtualMachineScaleSetOSDisk{
DiskSizeGB: &diskSizeGB,
DiffDiskSettings: nil, // This makes it managed
ManagedDisk: &compute.VirtualMachineScaleSetManagedDiskParameters{
StorageAccountType: compute.StorageAccountTypesPremiumLRS,
},
},
},
},
},
Tags: map[string]*string{
"poolName": &poolName,
},
Zones: &[]string{"1", "2"},
Location: to.StringPtr("westus"),
}
template, err := buildNodeTemplateFromVMSS(vmss, map[string]string{}, "")
assert.NoError(t, err)
manager := &AzureManager{}
node, err := buildNodeFromTemplate(testNodeName, template, manager, false, false)
assert.NoError(t, err)
assert.NotNil(t, node)
// Verify ephemeral storage is set correctly
expectedEphemeralStorage := resource.NewQuantity(int64(diskSizeGB)*1024*1024*1024, resource.DecimalSI)
ephemeralStorage, exists := node.Status.Capacity[apiv1.ResourceEphemeralStorage]
assert.True(t, exists)
assert.Equal(t, expectedEphemeralStorage.String(), ephemeralStorage.String())
}

View File

@ -469,7 +469,7 @@ func (vmPool *VMPool) TemplateNodeInfo() (*framework.NodeInfo, error) {
if err != nil {
return nil, err
}
node, err := buildNodeFromTemplate(vmPool.agentPoolName, template, vmPool.manager, vmPool.manager.config.EnableDynamicInstanceList, false)
node, err := buildNodeFromTemplate(vmPool.agentPoolName, template, vmPool.manager, vmPool.manager.config.EnableDynamicInstanceList)
if err != nil {
return nil, err
}

View File

@ -79,12 +79,6 @@ in the staging namespace, belonging to the purple cluster, with the label owner=
## Connecting cluster-autoscaler to Cluster API management and workload Clusters
> [!IMPORTANT]
> `--cloud-config` is the flag for specifying a mount volume path to the kubernetes configuration (ie KUBECONFIG) to the cluster-autoscaler for communicating with the cluster-api management cluster for the purpose of scaling machines.
> [!IMPORTANT]
> ``--kubeconfig` is the flag for specifying a mount volume path to the kubernetes configuration (ie KUBECONFIG) to the cluster-autoscaler for communicating with the cluster-api workload cluster for the purpose of watching Nodes and Pods. This flag can be affected by the desired topology for deploying the cluster-autoscaler, please see the diagrams below for more information.
You will also need to provide the path to the kubeconfig(s) for the management
and workload cluster you wish cluster-autoscaler to run against. To specify the
kubeconfig path for the workload cluster to monitor, use the `--kubeconfig`

View File

@ -10,16 +10,16 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
`HCLOUD_IMAGE` Defaults to `ubuntu-20.04`, @see https://docs.hetzner.cloud/#images. You can also use an image ID here (e.g. `15512617`), or a label selector associated with a custom snapshot (e.g. `customized_ubuntu=true`). The most recent snapshot will be used in the latter case.
`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
* `HCLOUD_CLOUD_INIT`
* `HCLOUD_IMAGE`
`HCLOUD_CLUSTER_CONFIG` This is the new format replacing
* `HCLOUD_CLOUD_INIT`
* `HCLOUD_IMAGE`
Base64 encoded JSON according to the following structure
```json
{
"imagesForArch": { // These should be the same format as HCLOUD_IMAGE
"arm64": "",
"arm64": "",
"amd64": ""
},
"nodeConfigs": {
@ -28,7 +28,7 @@ The cluster autoscaler for Hetzner Cloud scales worker nodes.
"labels": {
"node.kubernetes.io/role": "autoscaler-node"
},
"taints":
"taints":
[
{
"key": "node.kubernetes.io/role",
@ -47,13 +47,6 @@ Can be useful when you have many different node pools and run into issues of the
**NOTE**: In contrast to `HCLOUD_CLUSTER_CONFIG`, this file is not base64 encoded.
The global `imagesForArch` configuration can be overridden on a per-nodepool basis by adding an `imagesForArch` field to individual nodepool configurations.
The image selection logic works as follows:
1. If a nodepool has its own `imagesForArch` configuration, it will be used for that specific nodepool
1. If a nodepool doesn't have `imagesForArch` configured, the global `imagesForArch` configuration will be used as a fallback
1. If neither is configured, the legacy `HCLOUD_IMAGE` environment variable will be used
`HCLOUD_NETWORK` Default empty , The id or name of the network that is used in the cluster , @see https://docs.hetzner.cloud/#networks
@ -112,5 +105,5 @@ git add hcloud-go/
## Debugging
To enable debug logging, set the log level of the autoscaler to at least level 5 via cli flag: `--v=5`
The logs will include all requests and responses made towards the Hetzner API including headers and body.
To enable debug logging, set the log level of the autoscaler to at least level 5 via cli flag: `--v=5`
The logs will include all requests and responses made towards the Hetzner API including headers and body.

View File

@ -77,7 +77,6 @@ type NodeConfig struct {
PlacementGroup string
Taints []apiv1.Taint
Labels map[string]string
ImagesForArch *ImageList
}
// LegacyConfig holds the configuration in the legacy format

View File

@ -528,20 +528,12 @@ func findImage(n *hetznerNodeGroup, serverType *hcloud.ServerType) (*hcloud.Imag
// Select correct image based on server type architecture
imageName := n.manager.clusterConfig.LegacyConfig.ImageName
if n.manager.clusterConfig.IsUsingNewFormat {
// Check for nodepool-specific images first, then fall back to global images
var imagesForArch *ImageList
if nodeConfig, exists := n.manager.clusterConfig.NodeConfigs[n.id]; exists && nodeConfig.ImagesForArch != nil {
imagesForArch = nodeConfig.ImagesForArch
} else {
imagesForArch = &n.manager.clusterConfig.ImagesForArch
}
if serverType.Architecture == hcloud.ArchitectureARM {
imageName = imagesForArch.Arm64
imageName = n.manager.clusterConfig.ImagesForArch.Arm64
}
if serverType.Architecture == hcloud.ArchitectureX86 {
imageName = imagesForArch.Amd64
imageName = n.manager.clusterConfig.ImagesForArch.Amd64
}
}

View File

@ -1,174 +0,0 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package hetzner
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFindImageWithPerNodepoolConfig(t *testing.T) {
// Test case 1: Nodepool with specific imagesForArch should use those images
t.Run("nodepool with specific imagesForArch", func(t *testing.T) {
manager := &hetznerManager{
clusterConfig: &ClusterConfig{
IsUsingNewFormat: true,
ImagesForArch: ImageList{
Arm64: "global-arm64-image",
Amd64: "global-amd64-image",
},
NodeConfigs: map[string]*NodeConfig{
"pool1": {
ImagesForArch: &ImageList{
Arm64: "pool1-arm64-image",
Amd64: "pool1-amd64-image",
},
},
},
},
}
nodeGroup := &hetznerNodeGroup{
id: "pool1",
manager: manager,
}
// This would normally call the actual API, but we're just testing the logic
// The actual image selection logic is in findImage function
// For this test, we'll verify the configuration is set up correctly
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
require.True(t, exists)
require.NotNil(t, nodeConfig.ImagesForArch)
assert.Equal(t, "pool1-arm64-image", nodeConfig.ImagesForArch.Arm64)
assert.Equal(t, "pool1-amd64-image", nodeConfig.ImagesForArch.Amd64)
})
// Test case 2: Nodepool without specific imagesForArch should fall back to global
t.Run("nodepool without specific imagesForArch", func(t *testing.T) {
manager := &hetznerManager{
clusterConfig: &ClusterConfig{
IsUsingNewFormat: true,
ImagesForArch: ImageList{
Arm64: "global-arm64-image",
Amd64: "global-amd64-image",
},
NodeConfigs: map[string]*NodeConfig{
"pool2": {
// No ImagesForArch specified
},
},
},
}
nodeGroup := &hetznerNodeGroup{
id: "pool2",
manager: manager,
}
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
require.True(t, exists)
assert.Nil(t, nodeConfig.ImagesForArch)
assert.Equal(t, "global-arm64-image", manager.clusterConfig.ImagesForArch.Arm64)
assert.Equal(t, "global-amd64-image", manager.clusterConfig.ImagesForArch.Amd64)
})
// Test case 3: Nodepool with nil ImagesForArch should fall back to global
t.Run("nodepool with nil imagesForArch", func(t *testing.T) {
manager := &hetznerManager{
clusterConfig: &ClusterConfig{
IsUsingNewFormat: true,
ImagesForArch: ImageList{
Arm64: "global-arm64-image",
Amd64: "global-amd64-image",
},
NodeConfigs: map[string]*NodeConfig{
"pool3": {
ImagesForArch: nil, // Explicitly nil
},
},
},
}
nodeGroup := &hetznerNodeGroup{
id: "pool3",
manager: manager,
}
nodeConfig, exists := manager.clusterConfig.NodeConfigs[nodeGroup.id]
require.True(t, exists)
assert.Nil(t, nodeConfig.ImagesForArch)
assert.Equal(t, "global-arm64-image", manager.clusterConfig.ImagesForArch.Arm64)
assert.Equal(t, "global-amd64-image", manager.clusterConfig.ImagesForArch.Amd64)
})
}
func TestImageSelectionLogic(t *testing.T) {
// Test the image selection logic that would be used in findImage function
t.Run("image selection logic", func(t *testing.T) {
manager := &hetznerManager{
clusterConfig: &ClusterConfig{
IsUsingNewFormat: true,
ImagesForArch: ImageList{
Arm64: "global-arm64-image",
Amd64: "global-amd64-image",
},
NodeConfigs: map[string]*NodeConfig{
"pool1": {
ImagesForArch: &ImageList{
Arm64: "pool1-arm64-image",
Amd64: "pool1-amd64-image",
},
},
"pool2": {
// No ImagesForArch specified
},
},
},
}
// Test pool1 (has specific imagesForArch)
nodeConfig, exists := manager.clusterConfig.NodeConfigs["pool1"]
require.True(t, exists)
require.NotNil(t, nodeConfig.ImagesForArch)
var imagesForArch *ImageList
if nodeConfig.ImagesForArch != nil {
imagesForArch = nodeConfig.ImagesForArch
} else {
imagesForArch = &manager.clusterConfig.ImagesForArch
}
assert.Equal(t, "pool1-arm64-image", imagesForArch.Arm64)
assert.Equal(t, "pool1-amd64-image", imagesForArch.Amd64)
// Test pool2 (no specific imagesForArch, should use global)
nodeConfig, exists = manager.clusterConfig.NodeConfigs["pool2"]
require.True(t, exists)
assert.Nil(t, nodeConfig.ImagesForArch)
if nodeConfig.ImagesForArch != nil {
imagesForArch = nodeConfig.ImagesForArch
} else {
imagesForArch = &manager.clusterConfig.ImagesForArch
}
assert.Equal(t, "global-arm64-image", imagesForArch.Arm64)
assert.Equal(t, "global-amd64-image", imagesForArch.Amd64)
})
}

View File

@ -461,6 +461,12 @@ func (m *ociManagerImpl) GetExistingNodePoolSizeViaCompute(np NodePool) (int, er
if !strings.HasPrefix(*item.DisplayName, displayNamePrefix) {
continue
}
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
if *item.Id == "" {
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
continue
}
switch item.LifecycleState {
case core.InstanceLifecycleStateStopped, core.InstanceLifecycleStateTerminated:
klog.V(4).Infof("skipping instance is in stopped/terminated state: %q", *item.Id)
@ -519,23 +525,25 @@ func (m *ociManagerImpl) GetNodePoolNodes(np NodePool) ([]cloudprovider.Instance
nodePool, err := m.nodePoolCache.get(np.Id())
if err != nil {
klog.Error(err, "error while performing GetNodePoolNodes call")
return nil, err
}
var instances []cloudprovider.Instance
for _, node := range nodePool.Nodes {
// A node pool can fail to scale up if there's no capacity in the region. In that case, the node pool will be
// returned by the API, but it will not actually exist or have an ID, so we don't want to tell the autoscaler about it.
if *node.Id == "" {
klog.V(4).Infof("skipping node as it doesn't have a scaled-up instance")
continue
}
if node.NodeError != nil {
// We should move away from the approach of determining a node error as a Out of host capacity
// through string comparison. An error code specifically for Out of host capacity must be set
// and returned in the API response.
errorClass := cloudprovider.OtherErrorClass
if *node.NodeError.Code == "LimitExceeded" ||
*node.NodeError.Code == "QuotaExceeded" ||
(*node.NodeError.Code == "InternalError" &&
strings.Contains(*node.NodeError.Message, "Out of host capacity")) {
(*node.NodeError.Code == "InternalServerError" &&
strings.Contains(*node.NodeError.Message, "quota")) {
errorClass = cloudprovider.OutOfResourcesErrorClass
}

View File

@ -6,11 +6,12 @@ package nodepools
import (
"context"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
"net/http"
"reflect"
"testing"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/nodepools/consts"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/oci/vendor-internal/github.com/oracle/oci-go-sdk/v65/common"
@ -119,10 +120,16 @@ func TestGetNodePoolNodes(t *testing.T) {
{
Id: common.String("node8"),
NodeError: &oke.NodeError{
Code: common.String("InternalError"),
Message: common.String("blah blah Out of host capacity blah blah"),
Code: common.String("InternalServerError"),
Message: common.String("blah blah quota exceeded blah blah"),
},
},
{
// This case happens if a node fails to scale up due to lack of capacity in the region.
// It's not a real node, so we shouldn't return it in the list of nodes.
Id: common.String(""),
LifecycleState: oke.NodeLifecycleStateCreating,
},
},
}
@ -179,8 +186,8 @@ func TestGetNodePoolNodes(t *testing.T) {
State: cloudprovider.InstanceCreating,
ErrorInfo: &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
ErrorCode: "InternalError",
ErrorMessage: "blah blah Out of host capacity blah blah",
ErrorCode: "InternalServerError",
ErrorMessage: "blah blah quota exceeded blah blah",
},
},
},

View File

@ -214,27 +214,6 @@ func (np *nodePool) DecreaseTargetSize(delta int) error {
}
}
klog.V(4).Infof("DECREASE_TARGET_CHECK_VIA_COMPUTE: %v", decreaseTargetCheckViaComputeBool)
np.manager.InvalidateAndRefreshCache()
nodes, err := np.manager.GetNodePoolNodes(np)
if err != nil {
klog.V(4).Error(err, "error while performing GetNodePoolNodes call")
return err
}
// We do not have an OCI API that allows us to delete a node with a compute instance. So we rely on
// the below approach to determine the number running instance in a nodepool from the compute API and
//update the size of the nodepool accordingly. We should move away from this approach once we have an API
// to delete a specific node without a compute instance.
if !decreaseTargetCheckViaComputeBool {
for _, node := range nodes {
if node.Status != nil && node.Status.ErrorInfo != nil {
if node.Status.ErrorInfo.ErrorClass == cloudprovider.OutOfResourcesErrorClass {
klog.Infof("Using Compute to calculate nodepool size as nodepool may contain nodes without a compute instance.")
decreaseTargetCheckViaComputeBool = true
break
}
}
}
}
var nodesLen int
if decreaseTargetCheckViaComputeBool {
nodesLen, err = np.manager.GetExistingNodePoolSizeViaCompute(np)
@ -243,6 +222,12 @@ func (np *nodePool) DecreaseTargetSize(delta int) error {
return err
}
} else {
np.manager.InvalidateAndRefreshCache()
nodes, err := np.manager.GetNodePoolNodes(np)
if err != nil {
klog.V(4).Error(err, "error while performing GetNodePoolNodes call")
return err
}
nodesLen = len(nodes)
}

View File

@ -16,12 +16,12 @@ There are a wide variety of use cases here. Some examples are as follows:
## Configuration options
As using this expander requires communication with another service, users must specify a few options as CLI arguments.
```bash
--grpc-expander-url
```yaml
--grpcExpanderUrl
```
URL of the gRPC Expander server, for CA to communicate with.
```bash
--grpc-expander-cert
```yaml
--grpcExpanderCert
```
Location of the volume mounted certificate of the gRPC server if it is configured to communicate over TLS
@ -32,7 +32,7 @@ service. Note that the `protos/expander.pb.go` generated protobuf code will also
Communication between Cluster Autoscaler and the gRPC Server will occur over native kube-proxy. To use this, note the Service and Namespace the gRPC server is deployed in.
Deploy the gRPC Expander Server as a separate app, listening on a specifc port number.
Start Cluster Autoscaler with the `--grpc-expander-url=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpc-expander-cert` pointed at the location of the volume mounted certificate of the gRPC server.
Start Cluster Autoscaler with the `--grpcExapnderURl=SERVICE_NAME.NAMESPACE_NAME.svc.cluster.local:PORT_NUMBER` flag, as well as `--grpcExpanderCert` pointed at the location of the volume mounted certificate of the gRPC server.
## Details

View File

@ -43,21 +43,6 @@ spec:
- containerPort: 8000
- name: prometheus
containerPort: 8944
livenessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
periodSeconds: 10
failureThreshold: 3
volumes:
- name: tls-certs
secret:

View File

@ -41,18 +41,3 @@ spec:
ports:
- name: prometheus
containerPort: 8942
livenessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
periodSeconds: 10
failureThreshold: 3

View File

@ -41,18 +41,3 @@ spec:
ports:
- name: prometheus
containerPort: 8942
livenessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
periodSeconds: 10
failureThreshold: 3

View File

@ -32,18 +32,3 @@ spec:
ports:
- name: prometheus
containerPort: 8942
livenessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
periodSeconds: 10
failureThreshold: 3

View File

@ -37,18 +37,3 @@ spec:
ports:
- name: prometheus
containerPort: 8943
livenessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
failureThreshold: 3
readinessProbe:
httpGet:
path: /health-check
port: prometheus
scheme: HTTP
periodSeconds: 10
failureThreshold: 3

View File

@ -1,229 +0,0 @@
# AEP-8026: Allow per-VPA component configuration parameters
<!-- toc -->
- [Summary](#summary)
- [Motivation](#motivation)
- [Goals](#goals)
- [Non-Goals](#non-goals)
- [Proposal](#proposal)
- [Parameter Descriptions](#parameter-descriptions)
- [Container Policy Parameters](#container-policy-parameters)
- [Update Policy Parameters](#update-policy-parameters)
- [Design Details](#design-details)
- [API Changes](#api-changes)
- [Phase 1 (Current Proposal)](#phase-1-current-proposal)
- [Future Extensions](#future-extensions)
- [Feature Enablement and Rollback](#feature-enablement-and-rollback)
- [How can this feature be enabled / disabled in a live cluster?](#how-can-this-feature-be-enabled--disabled-in-a-live-cluster)
- [Kubernetes version compatibility](#kubernetes-version-compatibility)
- [Validation via CEL and Testing](#validation-via-cel-and-testing)
- [Test Plan](#test-plan)
- [Implementation History](#implementation-history)
- [Future Work](#future-work)
- [Alternatives](#alternatives)
- [Multiple VPA Deployments](#multiple-vpa-deployments)
- [Environment-Specific Configuration](#environment-specific-configuration)
<!-- /toc -->
## Summary
Currently, VPA components (recommender, updater, admission controller) are configured through global flags. This makes it challenging to support different workloads with varying resource optimization needs within the same cluster. This proposal introduces the ability to specify configuration parameters at the individual VPA object level, allowing for workload-specific optimization strategies.
## Motivation
Different types of workloads in a Kubernetes cluster often have different resource optimization requirements. For example:
- Batch processing jobs might benefit from aggressive OOM handling and frequent adjustments
- User-facing services might need more conservative growth patterns for stability
- Development environments might need different settings than production
Currently, supporting these different needs requires running multiple VPA component instances with different configurations, which increases operational complexity and resource usage.
### Goals
- Allow specification of component-specific parameters in individual VPA objects
- Support different optimization strategies for different workloads in the same cluster
- Maintain backward compatibility with existing global configuration
- Initially support the following parameters:
- oomBumpUpRatio
- oomMinBumpUp
- memoryAggregationInterval
- memoryAggregationIntervalCount
- evictAfterOOMThreshold
### Non-Goals
- Converting all existing VPA flags to per-object configuration
- Changing the core VPA algorithm or its decision-making process
- Adding new optimization strategies
## Proposal
The configuration will be split into two sections: container-specific recommendations under `containerPolicies` and updater configuration under `updatePolicy`. This structure is designed to be extensible, allowing for additional parameters to be added in future iterations of this enhancement.
```yaml
apiVersion: autoscaling.k8s.io/v1
kind: VerticalPodAutoscaler
metadata:
name: oom-test-vpa
spec:
targetRef:
apiVersion: apps/v1
kind: Deployment
name: oom-test
updatePolicy:
updateMode: Auto
evictAfterOOMThreshold: "5m"
resourcePolicy:
containerPolicies:
- containerName: "*"
oomBumpUpRatio: "1.5"
oomMinBumpUp: 104857600
memoryAggregationInterval: "12h"
memoryAggregationIntervalCount: 5
```
### Parameter Descriptions
#### Container Policy Parameters
#### Container Policy Parameters
* `oomBumpUpRatio` (Quantity):
- Multiplier applied to memory recommendations after OOM events
- Represented as a Quantity (e.g., "1.5")
- Must be greater than 1
- Controls how aggressively memory is increased after container crashes
* `oomMinBumpUp` (bytes):
- Minimum absolute memory increase after OOM events
- Ensures meaningful increases even for small containers
* `memoryAggregationInterval` (duration):
- Time window for aggregating memory usage data
- Affects how quickly VPA responds to memory usage changes
* `memoryAggregationIntervalCount` (integer):
- Number of consecutive memory aggregation intervals
- Used to calculate the total memory aggregation window length
- Total window length = memoryAggregationInterval * memoryAggregationIntervalCount
#### Update Policy Parameters
* `evictAfterOOMThreshold` (duration):
- Time to wait after OOM before considering pod eviction
- Helps prevent rapid eviction cycles while maintaining stability
Each parameter can be configured independently, falling back to global defaults if not specified. Values should be chosen based on workload characteristics and stability requirements.
## Design Details
### API Changes
#### Phase 1 (Current Proposal)
Extend `ContainerResourcePolicy` with:
* `oomBumpUpRatio`
* `oomMinBumpUp`
* `memoryAggregationInterval`
* `memoryAggregationIntervalCount`
Extend `PodUpdatePolicy` with:
* `evictAfterOOMThreshold`
#### Future Extensions
This AEP will be updated as additional parameters are identified for per-object configuration. Potential candidates include:
* `confidenceIntervalCPU`
* `confidenceIntervalMemory`
* `recommendationMarginFraction`
* Other parameters that benefit from workload-specific tuning
### Feature Enablement and Rollback
#### How can this feature be enabled / disabled in a live cluster?
- Feature gate name: `PerVPAConfig`
- Default: Off (Alpha)
- Components depending on the feature gate:
- admission-controller
- recommender
- updater
The feature gate will remain in alpha (default off) until:
- All planned configuration parameters have been implemented and tested
- Performance impact has been thoroughly evaluated
- Documentation is complete for all parameters
Disabling of feature gate `PerVPAConfig` will cause the following to happen:
- Any per-VPA configuration parameters specified in VPA objects will be ignored
- Components will fall back to using their global configuration values
Enabling of feature gate `PerVPAConfig` will cause the following to happen:
- VPA components will honor the per-VPA configuration parameters specified in VPA objects
- Validation will be performed on the configuration parameters
- Configuration parameters will override global defaults for the specific VPA object
### Kubernetes version compatibility
The `PerVPAConfig` feature requires VPA version 1.5.0 or higher. The feature is being introduced as alpha and will follow the standard Kubernetes feature gate graduation process:
- Alpha: v1.5.0 (default off)
- Beta: TBD (default on)
- GA: TBD (default on)
### Validation via CEL and Testing
Initial validation rules (CEL):
* `oomMinBumpUp` > 0
* `memoryAggregationInterval` > 0
* `evictAfterOOMThreshold` > 0
* `memoryAggregationIntervalCount` > 0
Validation via Admission Controller:
Some components cann't be validated using Common Expression Language (CEL). This validation is performed within the admission controller.
* `oomBumpUpRatio` Using Kubernetes Quantity type for validation. The value must be greater than 1.
Additional validation rules will be added as new parameters are introduced.
E2E tests will be included to verify:
* Different configurations are properly applied and respected by VPA components
* VPA behavior matches expected outcomes for different parameter combinations
* Proper fallback to global configuration when parameters are not specified
### Test Plan
- Unit tests for new API fields and validation
- Integration tests verifying different configurations are properly applied
- E2E tests comparing behavior with different configurations
- Upgrade tests ensuring backward compatibility
## Implementation History
- 2025-04-12: Initial proposal
- Future: Additional parameters will be added based on user feedback and requirements
## Future Work
This enhancement is designed to be extensible. As the VPA evolves and users provide feedback, additional parameters may be added to the per-object configuration. Each new parameter will:
1. Be documented in this AEP
2. Include appropriate validation rules
3. Maintain backward compatibility
4. Follow the same pattern of falling back to global configuration when not specified
The decision to add new parameters will be based on:
- User feedback and requirements
- Performance impact analysis
- Implementation complexity
- Maintenance considerations
## Alternatives
### Multiple VPA Deployments
Continue with current approach of running multiple VPA deployments with different configurations:
- Pros: No API changes needed
- Cons: Higher resource usage, operational complexity
### Environment-Specific Configuration
Use different VPA deployments per environment (dev/staging/prod):
- Pros: Simpler than per-workload configuration
- Cons: Less flexible, doesn't address varying needs within same environment

View File

@ -41,7 +41,6 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.9.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect

View File

@ -81,7 +81,6 @@ type ClusterStateFeederFactory struct {
KubeClient kube_client.Interface
MetricsClient metrics.MetricsClient
VpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
VpaCheckpointLister vpa_lister.VerticalPodAutoscalerCheckpointLister
VpaLister vpa_lister.VerticalPodAutoscalerLister
PodLister v1lister.PodLister
OOMObserver oom.Observer
@ -100,7 +99,6 @@ func (m ClusterStateFeederFactory) Make() *clusterStateFeeder {
metricsClient: m.MetricsClient,
oomChan: m.OOMObserver.GetObservedOomsChannel(),
vpaCheckpointClient: m.VpaCheckpointClient,
vpaCheckpointLister: m.VpaCheckpointLister,
vpaLister: m.VpaLister,
clusterState: m.ClusterState,
specClient: spec.NewSpecClient(m.PodLister),
@ -208,7 +206,6 @@ type clusterStateFeeder struct {
metricsClient metrics.MetricsClient
oomChan <-chan oom.OomInfo
vpaCheckpointClient vpa_api.VerticalPodAutoscalerCheckpointsGetter
vpaCheckpointLister vpa_lister.VerticalPodAutoscalerCheckpointLister
vpaLister vpa_lister.VerticalPodAutoscalerLister
clusterState model.ClusterState
selectorFetcher target.VpaTargetSelectorFetcher
@ -270,29 +267,25 @@ func (feeder *clusterStateFeeder) InitFromCheckpoints(ctx context.Context) {
klog.V(3).InfoS("Initializing VPA from checkpoints")
feeder.LoadVPAs(ctx)
klog.V(3).InfoS("Fetching VPA checkpoints")
checkpointList, err := feeder.vpaCheckpointLister.List(labels.Everything())
if err != nil {
klog.ErrorS(err, "Cannot list VPA checkpoints")
}
namespaces := make(map[string]bool)
for _, v := range feeder.clusterState.VPAs() {
namespaces[v.ID.Namespace] = true
}
for namespace := range namespaces {
if feeder.shouldIgnoreNamespace(namespace) {
klog.V(3).InfoS("Skipping loading VPA Checkpoints from namespace.", "namespace", namespace, "vpaObjectNamespace", feeder.vpaObjectNamespace, "ignoredNamespaces", feeder.ignoredNamespaces)
continue
klog.V(3).InfoS("Fetching checkpoints", "namespace", namespace)
checkpointList, err := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
klog.ErrorS(err, "Cannot list VPA checkpoints", "namespace", namespace)
}
for _, checkpoint := range checkpointList.Items {
for _, checkpoint := range checkpointList {
klog.V(3).InfoS("Loading checkpoint for VPA", "checkpoint", klog.KRef(checkpoint.Namespace, checkpoint.Spec.VPAObjectName), "container", checkpoint.Spec.ContainerName)
err = feeder.setVpaCheckpoint(checkpoint)
err = feeder.setVpaCheckpoint(&checkpoint)
if err != nil {
klog.ErrorS(err, "Error while loading checkpoint")
}
}
}
}
@ -352,12 +345,11 @@ func (feeder *clusterStateFeeder) shouldIgnoreNamespace(namespace string) bool {
func (feeder *clusterStateFeeder) cleanupCheckpointsForNamespace(ctx context.Context, namespace string, allVPAKeys map[model.VpaID]bool) error {
var err error
checkpointList, err := feeder.vpaCheckpointLister.VerticalPodAutoscalerCheckpoints(namespace).List(labels.Everything())
checkpointList, err := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).List(ctx, metav1.ListOptions{})
if err != nil {
return err
}
for _, checkpoint := range checkpointList {
for _, checkpoint := range checkpointList.Items {
vpaID := model.VpaID{Namespace: checkpoint.Namespace, VpaName: checkpoint.Spec.VPAObjectName}
if !allVPAKeys[vpaID] {
if errFeeder := feeder.vpaCheckpointClient.VerticalPodAutoscalerCheckpoints(namespace).Delete(ctx, checkpoint.Name, metav1.DeleteOptions{}); errFeeder != nil {
@ -581,9 +573,6 @@ func (feeder *clusterStateFeeder) validateTargetRef(ctx context.Context, vpa *vp
if vpa.Spec.TargetRef == nil {
return false, condition{}
}
target := fmt.Sprintf("%s.%s/%s", vpa.Spec.TargetRef.APIVersion, vpa.Spec.TargetRef.Kind, vpa.Spec.TargetRef.Name)
k := controllerfetcher.ControllerKeyWithAPIVersion{
ControllerKey: controllerfetcher.ControllerKey{
Namespace: vpa.Namespace,
@ -594,13 +583,13 @@ func (feeder *clusterStateFeeder) validateTargetRef(ctx context.Context, vpa *vp
}
top, err := feeder.controllerFetcher.FindTopMostWellKnownOrScalable(ctx, &k)
if err != nil {
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Error checking if target %s is a topmost well-known or scalable controller: %s", target, err)}
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Error checking if target is a topmost well-known or scalable controller: %s", err)}
}
if top == nil {
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Unknown error during checking if target %s is a topmost well-known or scalable controller", target)}
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("Unknown error during checking if target is a topmost well-known or scalable controller: %s", err)}
}
if *top != k {
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: fmt.Sprintf("The target %s has a parent controller but it should point to a topmost well-known or scalable controller", target)}
return false, condition{conditionType: vpa_types.ConfigUnsupported, delete: false, message: "The targetRef controller has a parent but it should point to a topmost well-known or scalable controller"}
}
return true, condition{}
}

View File

@ -65,8 +65,8 @@ var (
unsupportedConditionTextFromFetcher = "Cannot read targetRef. Reason: targetRef not defined"
unsupportedConditionNoExtraText = "Cannot read targetRef"
unsupportedConditionNoTargetRef = "Cannot read targetRef"
unsupportedConditionMudaMudaMuda = "Error checking if target taxonomy.doestar/doseph-doestar is a topmost well-known or scalable controller: muda muda muda"
unsupportedTargetRefHasParent = "The target stardust.dodokind/dotaro has a parent controller but it should point to a topmost well-known or scalable controller"
unsupportedConditionMudaMudaMuda = "Error checking if target is a topmost well-known or scalable controller: muda muda muda"
unsupportedTargetRefHasParent = "The targetRef controller has a parent but it should point to a topmost well-known or scalable controller"
)
const (
@ -859,12 +859,11 @@ func TestFilterVPAsIgnoreNamespaces(t *testing.T) {
func TestCanCleanupCheckpoints(t *testing.T) {
_, tctx := ktesting.NewTestContext(t)
client := fake.NewSimpleClientset()
namespace := "testNamespace"
_, err := client.CoreV1().Namespaces().Create(tctx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: namespace}}, metav1.CreateOptions{})
_, err := client.CoreV1().Namespaces().Create(tctx, &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "testNamespace"}}, metav1.CreateOptions{})
assert.NoError(t, err)
vpaBuilder := test.VerticalPodAutoscaler().WithContainer("container").WithNamespace(namespace).WithTargetRef(&autoscalingv1.CrossVersionObjectReference{
vpaBuilder := test.VerticalPodAutoscaler().WithContainer("container").WithNamespace("testNamespace").WithTargetRef(&autoscalingv1.CrossVersionObjectReference{
Kind: kind,
Name: name1,
APIVersion: apiVersion,
@ -879,19 +878,22 @@ func TestCanCleanupCheckpoints(t *testing.T) {
vpaLister := &test.VerticalPodAutoscalerListerMock{}
vpaLister.On("List").Return(vpas, nil)
vpaCheckPoint := &vpa_types.VerticalPodAutoscalerCheckpoint{
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: "nonExistentVPA",
},
Spec: vpa_types.VerticalPodAutoscalerCheckpointSpec{
VPAObjectName: "nonExistentVPA",
checkpoints := &vpa_types.VerticalPodAutoscalerCheckpointList{
Items: []vpa_types.VerticalPodAutoscalerCheckpoint{
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "testNamespace",
Name: "nonExistentVPA",
},
Spec: vpa_types.VerticalPodAutoscalerCheckpointSpec{
VPAObjectName: "nonExistentVPA",
},
},
},
}
vpacheckpoints := []*vpa_types.VerticalPodAutoscalerCheckpoint{vpaCheckPoint}
for _, vpa := range vpas {
vpacheckpoints = append(vpacheckpoints, &vpa_types.VerticalPodAutoscalerCheckpoint{
checkpoints.Items = append(checkpoints.Items, vpa_types.VerticalPodAutoscalerCheckpoint{
ObjectMeta: metav1.ObjectMeta{
Namespace: vpa.Namespace,
Name: vpa.Name,
@ -902,29 +904,23 @@ func TestCanCleanupCheckpoints(t *testing.T) {
})
}
// Create a mock checkpoint client to track deletions
checkpointClient := &fakeautoscalingv1.FakeAutoscalingV1{Fake: &core.Fake{}}
// Track deleted checkpoints
checkpointClient.AddReactor("list", "verticalpodautoscalercheckpoints", func(action core.Action) (bool, runtime.Object, error) {
return true, checkpoints, nil
})
deletedCheckpoints := []string{}
checkpointClient.AddReactor("delete", "verticalpodautoscalercheckpoints", func(action core.Action) (bool, runtime.Object, error) {
deleteAction := action.(core.DeleteAction)
deletedCheckpoints = append(deletedCheckpoints, deleteAction.GetName())
return true, nil, nil
})
// Create namespace lister mock that will return the checkpoint list
checkpointNamespaceLister := &test.VerticalPodAutoscalerCheckPointListerMock{}
checkpointNamespaceLister.On("List").Return(vpacheckpoints, nil)
// Create main checkpoint mock that will return the namespace lister
checkpointLister := &test.VerticalPodAutoscalerCheckPointListerMock{}
checkpointLister.On("VerticalPodAutoscalerCheckpoints", namespace).Return(checkpointNamespaceLister)
feeder := clusterStateFeeder{
coreClient: client.CoreV1(),
vpaLister: vpaLister,
vpaCheckpointClient: checkpointClient,
vpaCheckpointLister: checkpointLister,
clusterState: model.NewClusterState(testGcPeriod),
recommenderName: "default",
}

View File

@ -297,7 +297,6 @@ func run(ctx context.Context, healthCheck *metrics.HealthCheck, commonFlag *comm
MetricsClient: input_metrics.NewMetricsClient(source, commonFlag.VpaObjectNamespace, "default-metrics-client"),
VpaCheckpointClient: vpa_clientset.NewForConfigOrDie(config).AutoscalingV1(),
VpaLister: vpa_api_util.NewVpasLister(vpa_clientset.NewForConfigOrDie(config), make(chan struct{}), commonFlag.VpaObjectNamespace),
VpaCheckpointLister: vpa_api_util.NewVpaCheckpointLister(vpa_clientset.NewForConfigOrDie(config), make(chan struct{}), commonFlag.VpaObjectNamespace),
ClusterState: clusterState,
SelectorFetcher: target.NewVpaTargetSelectorFetcher(config, kubeClient, factory),
MemorySaveMode: *memorySaver,

View File

@ -230,8 +230,7 @@ func (u *updater) RunOnce(ctx context.Context) {
// to contain only Pods controlled by a VPA in auto, recreate, or inPlaceOrRecreate mode
for vpa, livePods := range controlledPods {
vpaSize := len(livePods)
updateMode := vpa_api_util.GetUpdateMode(vpa)
controlledPodsCounter.Add(vpaSize, updateMode, vpaSize)
controlledPodsCounter.Add(vpaSize, vpaSize)
creatorToSingleGroupStatsMap, podToReplicaCreatorMap, err := u.restrictionFactory.GetCreatorMaps(livePods, vpa)
if err != nil {
klog.ErrorS(err, "Failed to get creator maps")
@ -243,6 +242,7 @@ func (u *updater) RunOnce(ctx context.Context) {
podsForInPlace := make([]*apiv1.Pod, 0)
podsForEviction := make([]*apiv1.Pod, 0)
updateMode := vpa_api_util.GetUpdateMode(vpa)
if updateMode == vpa_types.UpdateModeInPlaceOrRecreate && features.Enabled(features.InPlaceOrRecreate) {
podsForInPlace = u.getPodsUpdateOrder(filterNonInPlaceUpdatablePods(livePods, inPlaceLimiter), vpa)
@ -253,7 +253,7 @@ func (u *updater) RunOnce(ctx context.Context) {
klog.InfoS("Warning: feature gate is not enabled for this updateMode", "featuregate", features.InPlaceOrRecreate, "updateMode", vpa_types.UpdateModeInPlaceOrRecreate)
}
podsForEviction = u.getPodsUpdateOrder(filterNonEvictablePods(livePods, evictionLimiter), vpa)
evictablePodsCounter.Add(vpaSize, updateMode, len(podsForEviction))
evictablePodsCounter.Add(vpaSize, len(podsForEviction))
}
withInPlaceUpdatable := false
@ -304,7 +304,7 @@ func (u *updater) RunOnce(ctx context.Context) {
klog.V(0).InfoS("Eviction failed", "error", evictErr, "pod", klog.KObj(pod))
} else {
withEvicted = true
metrics_updater.AddEvictedPod(vpaSize, updateMode)
metrics_updater.AddEvictedPod(vpaSize)
}
}
@ -315,10 +315,10 @@ func (u *updater) RunOnce(ctx context.Context) {
vpasWithInPlaceUpdatedPodsCounter.Add(vpaSize, 1)
}
if withEvictable {
vpasWithEvictablePodsCounter.Add(vpaSize, updateMode, 1)
vpasWithEvictablePodsCounter.Add(vpaSize, 1)
}
if withEvicted {
vpasWithEvictedPodsCounter.Add(vpaSize, updateMode, 1)
vpasWithEvictedPodsCounter.Add(vpaSize, 1)
}
}
timer.ObserveStep("EvictPods")

View File

@ -22,7 +22,6 @@ import (
"github.com/prometheus/client_golang/prometheus"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
"k8s.io/autoscaler/vertical-pod-autoscaler/pkg/utils/metrics"
)
@ -36,20 +35,13 @@ type SizeBasedGauge struct {
gauge *prometheus.GaugeVec
}
// UpdateModeAndSizeBasedGauge is a wrapper for incrementally recording values
// indexed by log2(VPA size) and update mode
type UpdateModeAndSizeBasedGauge struct {
values [metrics.MaxVpaSizeLog]map[vpa_types.UpdateMode]int
gauge *prometheus.GaugeVec
}
var (
controlledCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Name: "controlled_pods_total",
Help: "Number of Pods controlled by VPA updater.",
}, []string{"vpa_size_log2", "update_mode"},
}, []string{"vpa_size_log2"},
)
evictableCount = prometheus.NewGaugeVec(
@ -57,7 +49,7 @@ var (
Namespace: metricsNamespace,
Name: "evictable_pods_total",
Help: "Number of Pods matching evicition criteria.",
}, []string{"vpa_size_log2", "update_mode"},
}, []string{"vpa_size_log2"},
)
evictedCount = prometheus.NewCounterVec(
@ -65,7 +57,7 @@ var (
Namespace: metricsNamespace,
Name: "evicted_pods_total",
Help: "Number of Pods evicted by Updater to apply a new recommendation.",
}, []string{"vpa_size_log2", "update_mode"},
}, []string{"vpa_size_log2"},
)
vpasWithEvictablePodsCount = prometheus.NewGaugeVec(
@ -73,7 +65,7 @@ var (
Namespace: metricsNamespace,
Name: "vpas_with_evictable_pods_total",
Help: "Number of VPA objects with at least one Pod matching evicition criteria.",
}, []string{"vpa_size_log2", "update_mode"},
}, []string{"vpa_size_log2"},
)
vpasWithEvictedPodsCount = prometheus.NewGaugeVec(
@ -81,7 +73,7 @@ var (
Namespace: metricsNamespace,
Name: "vpas_with_evicted_pods_total",
Help: "Number of VPA objects with at least one evicted Pod.",
}, []string{"vpa_size_log2", "update_mode"},
}, []string{"vpa_size_log2"},
)
inPlaceUpdatableCount = prometheus.NewGaugeVec(
@ -146,41 +138,30 @@ func newSizeBasedGauge(gauge *prometheus.GaugeVec) *SizeBasedGauge {
}
}
// newModeAndSizeBasedGauge provides a wrapper for counting items in a loop
func newModeAndSizeBasedGauge(gauge *prometheus.GaugeVec) *UpdateModeAndSizeBasedGauge {
g := &UpdateModeAndSizeBasedGauge{
gauge: gauge,
}
for i := range g.values {
g.values[i] = make(map[vpa_types.UpdateMode]int)
}
return g
}
// NewControlledPodsCounter returns a wrapper for counting Pods controlled by Updater
func NewControlledPodsCounter() *UpdateModeAndSizeBasedGauge {
return newModeAndSizeBasedGauge(controlledCount)
func NewControlledPodsCounter() *SizeBasedGauge {
return newSizeBasedGauge(controlledCount)
}
// NewEvictablePodsCounter returns a wrapper for counting Pods which are matching eviction criteria
func NewEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
return newModeAndSizeBasedGauge(evictableCount)
func NewEvictablePodsCounter() *SizeBasedGauge {
return newSizeBasedGauge(evictableCount)
}
// NewVpasWithEvictablePodsCounter returns a wrapper for counting VPA objects with Pods matching eviction criteria
func NewVpasWithEvictablePodsCounter() *UpdateModeAndSizeBasedGauge {
return newModeAndSizeBasedGauge(vpasWithEvictablePodsCount)
func NewVpasWithEvictablePodsCounter() *SizeBasedGauge {
return newSizeBasedGauge(vpasWithEvictablePodsCount)
}
// NewVpasWithEvictedPodsCounter returns a wrapper for counting VPA objects with evicted Pods
func NewVpasWithEvictedPodsCounter() *UpdateModeAndSizeBasedGauge {
return newModeAndSizeBasedGauge(vpasWithEvictedPodsCount)
func NewVpasWithEvictedPodsCounter() *SizeBasedGauge {
return newSizeBasedGauge(vpasWithEvictedPodsCount)
}
// AddEvictedPod increases the counter of pods evicted by Updater, by given VPA size
func AddEvictedPod(vpaSize int, mode vpa_types.UpdateMode) {
func AddEvictedPod(vpaSize int) {
log2 := metrics.GetVpaSizeLog2(vpaSize)
evictedCount.WithLabelValues(strconv.Itoa(log2), string(mode)).Inc()
evictedCount.WithLabelValues(strconv.Itoa(log2)).Inc()
}
// NewInPlaceUpdatablePodsCounter returns a wrapper for counting Pods which are matching in-place update criteria
@ -222,19 +203,3 @@ func (g *SizeBasedGauge) Observe() {
g.gauge.WithLabelValues(strconv.Itoa(log2)).Set(float64(value))
}
}
// Add increases the counter for the given VPA size and VPA update mode.
func (g *UpdateModeAndSizeBasedGauge) Add(vpaSize int, vpaUpdateMode vpa_types.UpdateMode, value int) {
log2 := metrics.GetVpaSizeLog2(vpaSize)
g.values[log2][vpaUpdateMode] += value
}
// Observe stores the recorded values into metrics object associated with the
// wrapper
func (g *UpdateModeAndSizeBasedGauge) Observe() {
for log2, valueMap := range g.values {
for vpaMode, value := range valueMap {
g.gauge.WithLabelValues(strconv.Itoa(log2), string(vpaMode)).Set(float64(value))
}
}
}

View File

@ -1,297 +0,0 @@
/*
Copyright 2025 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package updater
import (
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
vpa_types "k8s.io/autoscaler/vertical-pod-autoscaler/pkg/apis/autoscaling.k8s.io/v1"
)
func TestAddEvictedPod(t *testing.T) {
testCases := []struct {
desc string
vpaSize int
mode vpa_types.UpdateMode
log2 string
}{
{
desc: "VPA size 5, mode Auto",
vpaSize: 5,
mode: vpa_types.UpdateModeAuto,
log2: "2",
},
{
desc: "VPA size 10, mode Off",
vpaSize: 10,
mode: vpa_types.UpdateModeOff,
log2: "3",
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
t.Cleanup(evictedCount.Reset)
AddEvictedPod(tc.vpaSize, tc.mode)
val := testutil.ToFloat64(evictedCount.WithLabelValues(tc.log2, string(tc.mode)))
if val != 1 {
t.Errorf("Unexpected value for evictedCount metric with labels (%s, %s): got %v, want 1", tc.log2, string(tc.mode), val)
}
})
}
}
func TestAddInPlaceUpdatedPod(t *testing.T) {
testCases := []struct {
desc string
vpaSize int
log2 string
}{
{
desc: "VPA size 10",
vpaSize: 10,
log2: "3",
},
{
desc: "VPA size 1",
vpaSize: 1,
log2: "0",
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
t.Cleanup(inPlaceUpdatedCount.Reset)
AddInPlaceUpdatedPod(tc.vpaSize)
val := testutil.ToFloat64(inPlaceUpdatedCount.WithLabelValues(tc.log2))
if val != 1 {
t.Errorf("Unexpected value for InPlaceUpdatedPod metric with labels (%s): got %v, want 1", tc.log2, val)
}
})
}
}
func TestRecordFailedInPlaceUpdate(t *testing.T) {
testCases := []struct {
desc string
vpaSize int
reason string
log2 string
}{
{
desc: "VPA size 2, some reason",
vpaSize: 2,
reason: "some_reason",
log2: "1",
},
{
desc: "VPA size 20, another reason",
vpaSize: 20,
reason: "another_reason",
log2: "4",
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
t.Cleanup(failedInPlaceUpdateAttempts.Reset)
RecordFailedInPlaceUpdate(tc.vpaSize, tc.reason)
val := testutil.ToFloat64(failedInPlaceUpdateAttempts.WithLabelValues(tc.log2, tc.reason))
if val != 1 {
t.Errorf("Unexpected value for FailedInPlaceUpdate metric with labels (%s, %s): got %v, want 1", tc.log2, tc.reason, val)
}
})
}
}
func TestUpdateModeAndSizeBasedGauge(t *testing.T) {
type addition struct {
vpaSize int
mode vpa_types.UpdateMode
value int
}
type expectation struct {
labels []string
value float64
}
testCases := []struct {
desc string
newCounter func() *UpdateModeAndSizeBasedGauge
metric *prometheus.GaugeVec
metricName string
additions []addition
expectedMetrics []expectation
}{
{
desc: "ControlledPodsCounter",
newCounter: NewControlledPodsCounter,
metric: controlledCount,
metricName: "vpa_updater_controlled_pods_total",
additions: []addition{
{1, vpa_types.UpdateModeAuto, 5},
{2, vpa_types.UpdateModeOff, 10},
{2, vpa_types.UpdateModeAuto, 2},
{2, vpa_types.UpdateModeAuto, 7},
},
expectedMetrics: []expectation{
{[]string{"0" /* log2(1) */, "Auto"}, 5},
{[]string{"1" /* log2(2) */, "Auto"}, 9},
{[]string{"1" /* log2(2) */, "Off"}, 10},
},
},
{
desc: "EvictablePodsCounter",
newCounter: NewEvictablePodsCounter,
metric: evictableCount,
metricName: "vpa_updater_evictable_pods_total",
additions: []addition{
{4, vpa_types.UpdateModeAuto, 3},
{1, vpa_types.UpdateModeRecreate, 8},
},
expectedMetrics: []expectation{
{[]string{"2" /* log2(4) */, "Auto"}, 3},
{[]string{"0" /* log2(1) */, "Recreate"}, 8},
},
},
{
desc: "VpasWithEvictablePodsCounter",
newCounter: NewVpasWithEvictablePodsCounter,
metric: vpasWithEvictablePodsCount,
metricName: "vpa_updater_vpas_with_evictable_pods_total",
additions: []addition{
{1, vpa_types.UpdateModeOff, 1},
{2, vpa_types.UpdateModeAuto, 1},
},
expectedMetrics: []expectation{
{[]string{"0" /* log2(1) */, "Off"}, 1},
{[]string{"1" /* log2(2) */, "Auto"}, 1},
},
},
{
desc: "VpasWithEvictedPodsCounter",
newCounter: NewVpasWithEvictedPodsCounter,
metric: vpasWithEvictedPodsCount,
metricName: "vpa_updater_vpas_with_evicted_pods_total",
additions: []addition{
{1, vpa_types.UpdateModeAuto, 2},
{1, vpa_types.UpdateModeAuto, 3},
},
expectedMetrics: []expectation{
{[]string{"0" /* log2(1) */, "Auto"}, 5},
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
t.Cleanup(tc.metric.Reset)
counter := tc.newCounter()
for _, add := range tc.additions {
counter.Add(add.vpaSize, add.mode, add.value)
}
counter.Observe()
for _, expected := range tc.expectedMetrics {
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
if val != expected.value {
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
}
}
})
}
}
func TestSizeBasedGauge(t *testing.T) {
type addition struct {
vpaSize int
value int
}
type expectation struct {
labels []string
value float64
}
testCases := []struct {
desc string
newCounter func() *SizeBasedGauge
metric *prometheus.GaugeVec
metricName string
additions []addition
expectedMetrics []expectation
}{
{
desc: "InPlaceUpdatablePodsCounter",
newCounter: NewInPlaceUpdatablePodsCounter,
metric: inPlaceUpdatableCount,
metricName: "vpa_updater_in_place_updatable_pods_total",
additions: []addition{
{1, 5},
{2, 10},
},
expectedMetrics: []expectation{
{[]string{"0" /* log2(1) */}, 5},
{[]string{"1" /* log2(2) */}, 10},
},
},
{
desc: "VpasWithInPlaceUpdatablePodsCounter",
newCounter: NewVpasWithInPlaceUpdatablePodsCounter,
metric: vpasWithInPlaceUpdatablePodsCount,
metricName: "vpa_updater_vpas_with_in_place_updatable_pods_total",
additions: []addition{
{10, 1},
{20, 1},
},
expectedMetrics: []expectation{
{[]string{"3" /* log2(10) */}, 1},
{[]string{"4" /* log2(20) */}, 1},
},
},
{
desc: "VpasWithInPlaceUpdatedPodsCounter",
newCounter: NewVpasWithInPlaceUpdatedPodsCounter,
metric: vpasWithInPlaceUpdatedPodsCount,
metricName: "vpa_updater_vpas_with_in_place_updated_pods_total",
additions: []addition{
{2, 4},
{4, 5},
},
expectedMetrics: []expectation{
{[]string{"1" /* log2(2) */}, 4},
{[]string{"2" /* log2(4) */}, 5},
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
t.Cleanup(tc.metric.Reset)
counter := tc.newCounter()
for _, add := range tc.additions {
counter.Add(add.vpaSize, add.value)
}
counter.Observe()
for _, expected := range tc.expectedMetrics {
val := testutil.ToFloat64(tc.metric.WithLabelValues(expected.labels...))
if val != expected.value {
t.Errorf("Unexpected value for metric %s with labels %v: got %v, want %v", tc.metricName, expected.labels, val, expected.value)
}
}
})
}
}

View File

@ -200,36 +200,6 @@ func (m *VerticalPodAutoscalerListerMock) Get(name string) (*vpa_types.VerticalP
return nil, fmt.Errorf("unimplemented")
}
// VerticalPodAutoscalerCheckPointListerMock is a mock of VerticalPodAutoscalerCheckPointLister
type VerticalPodAutoscalerCheckPointListerMock struct {
mock.Mock
}
// List is a mock implementation of VerticalPodAutoscalerLister.List
func (m *VerticalPodAutoscalerCheckPointListerMock) List(selector labels.Selector) (ret []*vpa_types.VerticalPodAutoscalerCheckpoint, err error) {
args := m.Called()
var returnArg []*vpa_types.VerticalPodAutoscalerCheckpoint
if args.Get(0) != nil {
returnArg = args.Get(0).([]*vpa_types.VerticalPodAutoscalerCheckpoint)
}
return returnArg, args.Error(1)
}
// VerticalPodAutoscalerCheckpoints is a mock implementation of returning a lister for namespace.
func (m *VerticalPodAutoscalerCheckPointListerMock) VerticalPodAutoscalerCheckpoints(namespace string) vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister {
args := m.Called(namespace)
var returnArg vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister
if args.Get(0) != nil {
returnArg = args.Get(0).(vpa_lister.VerticalPodAutoscalerCheckpointNamespaceLister)
}
return returnArg
}
// Get is not implemented for this mock
func (m *VerticalPodAutoscalerCheckPointListerMock) Get(name string) (*vpa_types.VerticalPodAutoscalerCheckpoint, error) {
return nil, fmt.Errorf("unimplemented")
}
// VerticalPodAutoscalerV1Beta1ListerMock is a mock of VerticalPodAutoscalerLister or
// VerticalPodAutoscalerNamespaceLister - the crucial List method is the same.
type VerticalPodAutoscalerV1Beta1ListerMock struct {

View File

@ -107,36 +107,6 @@ func NewVpasLister(vpaClient *vpa_clientset.Clientset, stopChannel <-chan struct
return vpaLister
}
// NewVpaCheckpointLister returns VerticalPodAutoscalerCheckpointLister configured to fetch all VPACheckpoint objects from namespace,
// set namespace to k8sapiv1.NamespaceAll to select all namespaces.
// The method blocks until vpaCheckpointLister is initially populated.
func NewVpaCheckpointLister(vpaClient *vpa_clientset.Clientset, stopChannel <-chan struct{}, namespace string) vpa_lister.VerticalPodAutoscalerCheckpointLister {
vpaListWatch := cache.NewListWatchFromClient(vpaClient.AutoscalingV1().RESTClient(), "verticalpodautoscalercheckpoints", namespace, fields.Everything())
informerOptions := cache.InformerOptions{
ObjectType: &vpa_types.VerticalPodAutoscalerCheckpoint{},
ListerWatcher: vpaListWatch,
Handler: &cache.ResourceEventHandlerFuncs{},
ResyncPeriod: 1 * time.Hour,
Indexers: cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc},
}
store, controller := cache.NewInformerWithOptions(informerOptions)
indexer, ok := store.(cache.Indexer)
if !ok {
klog.ErrorS(nil, "Expected Indexer, but got a Store that does not implement Indexer")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
vpaCheckpointLister := vpa_lister.NewVerticalPodAutoscalerCheckpointLister(indexer)
go controller.Run(stopChannel)
if !cache.WaitForCacheSync(stopChannel, controller.HasSynced) {
klog.ErrorS(nil, "Failed to sync VPA checkpoint cache during initialization")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
} else {
klog.InfoS("Initial VPA checkpoint synced successfully")
}
return vpaCheckpointLister
}
// PodMatchesVPA returns true iff the vpaWithSelector matches the Pod.
func PodMatchesVPA(pod *core.Pod, vpaWithSelector *VpaWithSelector) bool {
return PodLabelsMatchVPA(pod.Namespace, labels.Set(pod.GetLabels()), vpaWithSelector.Vpa.Namespace, vpaWithSelector.Selector)