Account for kernel reserved memory in capacity calculations

This commit is contained in:
Jacek Kaniuk 2019-01-30 15:52:02 +01:00
parent 52e2cf4e46
commit f054c53c46
25 changed files with 831 additions and 524 deletions

View File

@ -26,6 +26,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
@ -478,7 +479,7 @@ func (m *gceManagerImpl) getCpuAndMemoryForMachineType(machineType string, zone
}
m.cache.AddMachineToCache(machineType, zone, machine)
}
return machine.GuestCpus, machine.MemoryMb * bytesPerMB, nil
return machine.GuestCpus, machine.MemoryMb * units.MiB, nil
}
func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
@ -492,6 +493,6 @@ func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
return 0, 0, fmt.Errorf("failed to parse all params in %s", machineType)
}
// Mb to bytes
mem = mem * bytesPerMB
mem = mem * units.MiB
return
}

View File

@ -24,6 +24,7 @@ import (
"time"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
@ -1092,14 +1093,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) {
cpu, mem, err := g.getCpuAndMemoryForMachineType("custom-8-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(8), cpu)
assert.Equal(t, int64(2*bytesPerMB), mem)
assert.Equal(t, int64(2*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type found in cache.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-1", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(1), cpu)
assert.Equal(t, int64(1*bytesPerMB), mem)
assert.Equal(t, int64(1*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type not found in cache.
@ -1107,14 +1108,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) {
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type cached.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type not found in the zone.
@ -1129,7 +1130,7 @@ func TestParseCustomMachineType(t *testing.T) {
cpu, mem, err := parseCustomMachineType("custom-2-2816")
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(2816*bytesPerMB), mem)
assert.Equal(t, int64(2816*units.MiB), mem)
cpu, mem, err = parseCustomMachineType("other-a2-2816")
assert.Error(t, err)
cpu, mem, err = parseCustomMachineType("other-2-2816")

View File

@ -147,7 +147,7 @@ func getBasePrice(resources apiv1.ResourceList, startTime time.Time, endTime tim
cpu := resources[apiv1.ResourceCPU]
mem := resources[apiv1.ResourceMemory]
price += float64(cpu.MilliValue()) / 1000.0 * cpuPricePerHour * hours
price += float64(mem.Value()) / float64(units.Gigabyte) * memoryPricePerHourPerGb * hours
price += float64(mem.Value()) / float64(units.GiB) * memoryPricePerHourPerGb * hours
return price
}

View File

@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
"github.com/stretchr/testify/assert"
)
@ -46,13 +47,13 @@ func TestGetNodePrice(t *testing.T) {
now := time.Now()
// regular
node1 := BuildTestNode("sillyname1", 8000, 30*1024*1024*1024)
node1 := BuildTestNode("sillyname1", 8000, 30*units.GiB)
node1.Labels = labels1
price1, err := model.NodePrice(node1, now, now.Add(time.Hour))
assert.NoError(t, err)
// preemptible
node2 := BuildTestNode("sillyname2", 8000, 30*1024*1024*1024)
node2 := BuildTestNode("sillyname2", 8000, 30*units.GiB)
node2.Labels = labels2
price2, err := model.NodePrice(node2, now, now.Add(time.Hour))
assert.NoError(t, err)
@ -60,7 +61,7 @@ func TestGetNodePrice(t *testing.T) {
assert.True(t, price1 > 3*price2)
// custom node
node3 := BuildTestNode("sillyname3", 8000, 30*1024*1024*1024)
node3 := BuildTestNode("sillyname3", 8000, 30*units.GiB)
price3, err := model.NodePrice(node3, now, now.Add(time.Hour))
assert.NoError(t, err)
// custom nodes should be slightly more expensive than regular.
@ -68,13 +69,13 @@ func TestGetNodePrice(t *testing.T) {
assert.True(t, price1*1.2 > price3)
// regular with gpu
node4 := BuildTestNode("sillyname4", 8000, 30*1024*1024*1024)
node4 := BuildTestNode("sillyname4", 8000, 30*units.GiB)
node4.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(1, resource.DecimalSI)
node4.Labels = labels1
price4, err := model.NodePrice(node4, now, now.Add(time.Hour))
// preemptible with gpu
node5 := BuildTestNode("sillyname5", 8000, 30*1024*1024*1024)
node5 := BuildTestNode("sillyname5", 8000, 30*units.GiB)
node5.Labels = labels2
node5.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(1, resource.DecimalSI)
price5, err := model.NodePrice(node5, now, now.Add(time.Hour))
@ -86,7 +87,7 @@ func TestGetNodePrice(t *testing.T) {
assert.True(t, price4 > 2*price1)
// small custom node
node6 := BuildTestNode("sillyname6", 1000, 3750*1024*1024)
node6 := BuildTestNode("sillyname6", 1000, 3750*units.MiB)
price6, err := model.NodePrice(node6, now, now.Add(time.Hour))
assert.NoError(t, err)
// 8 times smaller node should be 8 times less expensive.
@ -94,8 +95,8 @@ func TestGetNodePrice(t *testing.T) {
}
func TestGetPodPrice(t *testing.T) {
pod1 := BuildTestPod("a1", 100, 500*1024*1024)
pod2 := BuildTestPod("a2", 2*100, 2*500*1024*1024)
pod1 := BuildTestPod("a1", 100, 500*units.MiB)
pod2 := BuildTestPod("a2", 2*100, 2*500*units.MiB)
model := &GcePriceModel{}
now := time.Now()

View File

@ -0,0 +1,53 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gce
// There should be no imports as it is used standalone in e2e tests
const (
// MiB - MebiByte size (2^20)
MiB = 1024 * 1024
// GiB - GibiByte size (2^30)
GiB = 1024 * 1024 * 1024
// KubeletEvictionHardMemory is subtracted from capacity
// when calculating allocatable (on top of kube-reserved).
// Equals kubelet "evictionHard: {memory.available}"
// We don't have a good place to get it from, but it has been hard-coded
// to 100Mi since at least k8s 1.4.
KubeletEvictionHardMemory = 100 * MiB
// Kernel reserved memory is subtracted when calculating total memory.
kernelReservedRatio = 64
kernelReservedMemory = 16 * MiB
// Reserved memory for software IO TLB
swiotlbReservedMemory = 64 * MiB
swiotlbThresholdMemory = 3 * GiB
)
// CalculateKernelReserved computes how much memory Linux kernel will reserve.
// TODO(jkaniuk): account for crashkernel reservation on RHEL / CentOS
func CalculateKernelReserved(physicalMemory int64) int64 {
// Account for memory reserved by kernel
reserved := int64(physicalMemory / kernelReservedRatio)
reserved += kernelReservedMemory
// Account for software IO TLB allocation if memory requires 64bit addressing
if physicalMemory > swiotlbThresholdMemory {
reserved += swiotlbReservedMemory
}
return reserved
}

View File

@ -0,0 +1,63 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gce
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
)
func TestCalculateKernelReserved(t *testing.T) {
type testCase struct {
physicalMemory int64
reservedMemory int64
}
testCases := []testCase{
{
physicalMemory: 256 * MiB,
reservedMemory: 4*MiB + kernelReservedMemory,
},
{
physicalMemory: 2 * GiB,
reservedMemory: 32*MiB + kernelReservedMemory,
},
{
physicalMemory: 3 * GiB,
reservedMemory: 48*MiB + kernelReservedMemory,
},
{
physicalMemory: 3.25 * GiB,
reservedMemory: 52*MiB + kernelReservedMemory + swiotlbReservedMemory,
},
{
physicalMemory: 4 * GiB,
reservedMemory: 64*MiB + kernelReservedMemory + swiotlbReservedMemory,
},
{
physicalMemory: 128 * GiB,
reservedMemory: 2*GiB + kernelReservedMemory + swiotlbReservedMemory,
},
}
for idx, tc := range testCases {
t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
reserved := CalculateKernelReserved(tc.physicalMemory)
assert.Equal(t, tc.reservedMemory, reserved)
})
}
}

View File

@ -22,12 +22,11 @@ import (
"regexp"
"strings"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
gce "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
@ -35,17 +34,6 @@ import (
"k8s.io/klog"
)
const (
mbPerGB = 1000
bytesPerMB = 1000 * 1000
millicoresPerCore = 1000
// Kubelet "evictionHard: {memory.available}" is subtracted from
// capacity when calculating allocatable (on top of kube-reserved).
// We don't have a good place to get it from, but it has been hard-coded
// to 100Mi since at least k8s 1.4.
kubeletEvictionHardMemory = 100 * 1024 * 1024
)
// GceTemplateBuilder builds templates for GCE nodes.
type GceTemplateBuilder struct{}
@ -59,13 +47,14 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator
return count
}
// BuildCapacity builds a list of resource capacities for a node.
func (t *GceTemplateBuilder) BuildCapacity(machineType string, accelerators []*gce.AcceleratorConfig, zone string, cpu int64, mem int64) (apiv1.ResourceList, error) {
// BuildCapacity builds a list of resource capacities given list of hardware.
func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig) (apiv1.ResourceList, error) {
capacity := apiv1.ResourceList{}
// TODO: get a real value.
capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
capacity[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
capacity[apiv1.ResourceMemory] = *resource.NewQuantity(mem, resource.DecimalSI)
memTotal := mem - CalculateKernelReserved(mem)
capacity[apiv1.ResourceMemory] = *resource.NewQuantity(memTotal, resource.DecimalSI)
if accelerators != nil && len(accelerators) > 0 {
capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(t.getAcceleratorCount(accelerators), resource.DecimalSI)
@ -91,33 +80,21 @@ func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.Resource
if err != nil {
return nil, err
}
if quantity, found := reserved[apiv1.ResourceMemory]; found {
reserved[apiv1.ResourceMemory] = *resource.NewQuantity(quantity.Value()+kubeletEvictionHardMemory, resource.BinarySI)
}
return t.getAllocatable(capacity, reserved), nil
return t.CalculateAllocatable(capacity, reserved), nil
}
// BuildAllocatableFromCapacity builds node allocatable based only on node capacity.
// Calculates reserved as a ratio of capacity. See calculateReserved for more details
func (t *GceTemplateBuilder) BuildAllocatableFromCapacity(capacity apiv1.ResourceList) apiv1.ResourceList {
memoryReserved := memoryReservedMB(capacity.Memory().Value() / bytesPerMB)
cpuReserved := cpuReservedMillicores(capacity.Cpu().MilliValue())
reserved := apiv1.ResourceList{}
reserved[apiv1.ResourceCPU] = *resource.NewMilliQuantity(cpuReserved, resource.DecimalSI)
// Duplicating an upstream bug treating MB as MiB (we need to predict the end result accurately).
memoryReserved = memoryReserved * 1024 * 1024
memoryReserved += kubeletEvictionHardMemory
reserved[apiv1.ResourceMemory] = *resource.NewQuantity(memoryReserved, resource.BinarySI)
return t.getAllocatable(capacity, reserved)
}
func (t *GceTemplateBuilder) getAllocatable(capacity, reserved apiv1.ResourceList) apiv1.ResourceList {
// CalculateAllocatable computes allocatable resources subtracting kube reserved values
// and kubelet eviction memory buffer from corresponding capacity.
func (t *GceTemplateBuilder) CalculateAllocatable(capacity, kubeReserved apiv1.ResourceList) apiv1.ResourceList {
allocatable := apiv1.ResourceList{}
for key, value := range capacity {
quantity := *value.Copy()
if reservedQuantity, found := reserved[key]; found {
if reservedQuantity, found := kubeReserved[key]; found {
quantity.Sub(reservedQuantity)
}
if key == apiv1.ResourceMemory {
quantity = *resource.NewQuantity(quantity.Value()-KubeletEvictionHardMemory, resource.BinarySI)
}
allocatable[key] = quantity
}
return allocatable
@ -139,7 +116,7 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
Labels: map[string]string{},
}
capacity, err := t.BuildCapacity(template.Properties.MachineType, template.Properties.GuestAccelerators, mig.GceRef().Zone, cpu, mem)
capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators)
if err != nil {
return nil, err
}
@ -346,81 +323,3 @@ func buildTaints(kubeEnvTaints map[string]string) ([]apiv1.Taint, error) {
}
return taints, nil
}
type allocatableBracket struct {
threshold int64
marginalReservedRate float64
}
func memoryReservedMB(memoryCapacityMB int64) int64 {
if memoryCapacityMB <= 1*mbPerGB {
// do not set any memory reserved for nodes with less than 1 Gb of capacity
return 0
}
return calculateReserved(memoryCapacityMB, []allocatableBracket{
{
threshold: 0,
marginalReservedRate: 0.25,
},
{
threshold: 4 * mbPerGB,
marginalReservedRate: 0.2,
},
{
threshold: 8 * mbPerGB,
marginalReservedRate: 0.1,
},
{
threshold: 16 * mbPerGB,
marginalReservedRate: 0.06,
},
{
threshold: 128 * mbPerGB,
marginalReservedRate: 0.02,
},
})
}
func cpuReservedMillicores(cpuCapacityMillicores int64) int64 {
return calculateReserved(cpuCapacityMillicores, []allocatableBracket{
{
threshold: 0,
marginalReservedRate: 0.06,
},
{
threshold: 1 * millicoresPerCore,
marginalReservedRate: 0.01,
},
{
threshold: 2 * millicoresPerCore,
marginalReservedRate: 0.005,
},
{
threshold: 4 * millicoresPerCore,
marginalReservedRate: 0.0025,
},
})
}
// calculateReserved calculates reserved using capacity and a series of
// brackets as follows: the marginalReservedRate applies to all capacity
// greater than the bracket, but less than the next bracket. For example, if
// the first bracket is threshold: 0, rate:0.1, and the second bracket has
// threshold: 100, rate: 0.4, a capacity of 100 results in a reserved of
// 100*0.1 = 10, but a capacity of 200 results in a reserved of
// 10 + (200-100)*.4 = 50. Using brackets with marginal rates ensures that as
// capacity increases, reserved always increases, and never decreases.
func calculateReserved(capacity int64, brackets []allocatableBracket) int64 {
var reserved float64
for i, bracket := range brackets {
c := capacity
if i < len(brackets)-1 && brackets[i+1].threshold < capacity {
c = brackets[i+1].threshold
}
additionalReserved := float64(c-bracket.threshold) * bracket.marginalReservedRate
if additionalReserved > 0 {
reserved += additionalReserved
}
}
return int64(reserved)
}

View File

@ -18,10 +18,12 @@ package gce
import (
"fmt"
"strings"
"testing"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
gpuUtils "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
gce "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
@ -34,104 +36,93 @@ import (
func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
type testCase struct {
kubeEnv string
name string
machineType string
accelerators []*gce.AcceleratorConfig
mig Mig
capacityCpu int64
capacityMemory int64
allocatableCpu string
allocatableMemory string
gpuCount int64
expectedErr bool
scenario string
kubeEnv string
accelerators []*gce.AcceleratorConfig
mig Mig
physicalCpu int64
physicalMemory int64
kubeReserved bool
reservedCpu string
reservedMemory string
expectedGpuCount int64
expectedErr bool
}
testCases := []testCase{{
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
name: "nodeName",
machineType: "custom-8-2",
accelerators: []*gce.AcceleratorConfig{
{AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3},
{AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8},
},
mig: &gceMig{
gceRef: GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
capacityCpu: 8,
capacityMemory: 200 * 1024 * 1024,
allocatableCpu: "7000m",
allocatableMemory: fmt.Sprintf("%v", 99*1024*1024),
gpuCount: 11,
expectedErr: false,
},
testCases := []testCase{
{
scenario: "kube-reserved present in kube-env",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1*units.MiB) +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
accelerators: []*gce.AcceleratorConfig{
{AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3},
{AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8},
},
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: true,
reservedCpu: "1000m",
reservedMemory: fmt.Sprintf("%v", 1*units.MiB),
expectedGpuCount: 11,
expectedErr: false,
},
{
scenario: "no kube-reserved in kube-env",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
name: "nodeName",
machineType: "custom-8-2",
mig: &gceMig{
gceRef: GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
capacityCpu: 8,
capacityMemory: 2 * 1024 * 1024,
allocatableCpu: "8000m",
allocatableMemory: fmt.Sprintf("%v", 2*1024*1024),
expectedErr: false,
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: false,
expectedGpuCount: 11,
expectedErr: false,
}, {
scenario: "totally messed up kube-env",
kubeEnv: "This kube-env is totally messed up",
name: "nodeName",
machineType: "custom-8-2",
mig: &gceMig{
gceRef: GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
expectedErr: true,
},
}
for _, tc := range testCases {
tb := &GceTemplateBuilder{}
template := &gce.InstanceTemplate{
Name: tc.name,
Properties: &gce.InstanceProperties{
GuestAccelerators: tc.accelerators,
Metadata: &gce.Metadata{
Items: []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}},
t.Run(tc.scenario, func(t *testing.T) {
tb := &GceTemplateBuilder{}
mig := &gceMig{
gceRef: GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
MachineType: tc.machineType,
},
}
node, err := tb.BuildNodeFromTemplate(tc.mig, template, tc.capacityCpu, tc.capacityMemory)
if tc.expectedErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
podsQuantity, _ := resource.ParseQuantity("110")
capacity, err := makeResourceList(fmt.Sprintf("%dm", tc.capacityCpu*1000), fmt.Sprintf("%v", tc.capacityMemory), tc.gpuCount)
capacity[apiv1.ResourcePods] = podsQuantity
assert.NoError(t, err)
allocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount)
allocatable[apiv1.ResourcePods] = podsQuantity
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable)
}
}
template := &gce.InstanceTemplate{
Name: "node-name",
Properties: &gce.InstanceProperties{
GuestAccelerators: tc.accelerators,
Metadata: &gce.Metadata{
Items: []*gce.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}},
},
MachineType: "irrelevant-type",
},
}
node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory)
if tc.expectedErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators)
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
if !tc.kubeReserved {
assertEqualResourceLists(t, "Allocatable", capacity, node.Status.Allocatable)
} else {
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
assert.NoError(t, err)
allocatable := tb.CalculateAllocatable(capacity, reserved)
assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable)
}
}
})
}
}
@ -150,6 +141,51 @@ func TestBuildGenericLabels(t *testing.T) {
assert.Equal(t, cloudprovider.DefaultOS, labels[kubeletapis.LabelOS])
}
func TestCalculateAllocatable(t *testing.T) {
type testCase struct {
scenario string
capacityCpu string
reservedCpu string
allocatableCpu string
capacityMemory string
reservedMemory string
allocatableMemory string
}
testCases := []testCase{
{
scenario: "no reservations",
capacityCpu: "8",
reservedCpu: "0",
allocatableCpu: "8",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: "0",
allocatableMemory: fmt.Sprintf("%v", 200*units.MiB-KubeletEvictionHardMemory),
},
{
scenario: "reserved cpu and memory",
capacityCpu: "8",
reservedCpu: "1000m",
allocatableCpu: "7000m",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: fmt.Sprintf("%v", 50*units.MiB),
allocatableMemory: fmt.Sprintf("%v", 150*units.MiB-KubeletEvictionHardMemory),
},
}
for _, tc := range testCases {
t.Run(tc.scenario, func(t *testing.T) {
tb := GceTemplateBuilder{}
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, 0)
assert.NoError(t, err)
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
assert.NoError(t, err)
expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, 0)
assert.NoError(t, err)
allocatable := tb.CalculateAllocatable(capacity, reserved)
assertEqualResourceLists(t, "Allocatable", expectedAllocatable, allocatable)
})
}
}
func TestBuildAllocatableFromKubeEnv(t *testing.T) {
type testCase struct {
kubeEnv string
@ -233,36 +269,38 @@ func TestGetAcceleratorCount(t *testing.T) {
}
}
func TestBuildAllocatableFromCapacity(t *testing.T) {
func TestBuildCapacityMemory(t *testing.T) {
type testCase struct {
capacityCpu string
capacityMemory string
allocatableCpu string
allocatableMemory string
gpuCount int64
physicalMemory int64
capacityMemory int64
physicalCpu int64
}
testCases := []testCase{{
capacityCpu: "16000m",
capacityMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB),
allocatableCpu: "15890m",
// Below threshold for reserving memory
allocatableMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB-kubeletEvictionHardMemory),
gpuCount: 1,
}, {
capacityCpu: "500m",
capacityMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB),
allocatableCpu: "470m",
// Final 1024*1024 because we're duplicating upstream bug using MB as MiB
allocatableMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB-0.25*1.1*mbPerGB*1024*1024-kubeletEvictionHardMemory),
}}
for _, tc := range testCases {
tb := GceTemplateBuilder{}
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, tc.gpuCount)
assert.NoError(t, err)
expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount)
assert.NoError(t, err)
allocatable := tb.BuildAllocatableFromCapacity(capacity)
assertEqualResourceLists(t, "Allocatable", expectedAllocatable, allocatable)
testCases := []testCase{
{
physicalMemory: 2 * units.GiB,
capacityMemory: 2*units.GiB - 32*units.MiB - kernelReservedMemory,
physicalCpu: 1,
},
{
physicalMemory: 4 * units.GiB,
capacityMemory: 4*units.GiB - 64*units.MiB - kernelReservedMemory - swiotlbReservedMemory,
physicalCpu: 2,
},
{
physicalMemory: 128 * units.GiB,
capacityMemory: 128*units.GiB - 2*units.GiB - kernelReservedMemory - swiotlbReservedMemory,
physicalCpu: 32,
},
}
for idx, tc := range testCases {
t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
tb := GceTemplateBuilder{}
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, make([]*gce.AcceleratorConfig, 0))
assert.NoError(t, err)
expected, err := makeResourceList2(tc.physicalCpu, tc.capacityMemory, 0, 110)
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, expected)
})
}
}
@ -531,52 +569,6 @@ func TestParseKubeReserved(t *testing.T) {
}
}
func TestCalculateReserved(t *testing.T) {
type testCase struct {
name string
function func(capacity int64) int64
capacity int64
expectedReserved int64
}
testCases := []testCase{
{
name: "zero memory capacity",
function: memoryReservedMB,
capacity: 0,
expectedReserved: 0,
},
{
name: "between memory thresholds",
function: memoryReservedMB,
capacity: 2 * mbPerGB,
expectedReserved: 500, // 0.5 Gb
},
{
name: "at a memory threshold boundary",
function: memoryReservedMB,
capacity: 8 * mbPerGB,
expectedReserved: 1800, // 1.8 Gb
},
{
name: "exceeds highest memory threshold",
function: memoryReservedMB,
capacity: 200 * mbPerGB,
expectedReserved: 10760, // 10.8 Gb
},
{
name: "cpu sanity check",
function: cpuReservedMillicores,
capacity: 4 * millicoresPerCore,
expectedReserved: 80,
},
}
for _, tc := range testCases {
if actualReserved := tc.function(tc.capacity); actualReserved != tc.expectedReserved {
t.Errorf("Test case: %s, Got f(%d Mb) = %d. Want %d", tc.name, tc.capacity, actualReserved, tc.expectedReserved)
}
}
}
func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
@ -607,6 +599,38 @@ func makeResourceList(cpu string, memory string, gpu int64) (apiv1.ResourceList,
return result, nil
}
func assertEqualResourceLists(t *testing.T, name string, expected, actual apiv1.ResourceList) {
assert.True(t, quota.V1Equals(expected, actual), "%q unequal:\nExpected:%v\nActual:%v", name, expected, actual)
func makeResourceList2(cpu int64, memory int64, gpu int64, pods int64) (apiv1.ResourceList, error) {
result := apiv1.ResourceList{}
result[apiv1.ResourceCPU] = *resource.NewQuantity(cpu, resource.DecimalSI)
result[apiv1.ResourceMemory] = *resource.NewQuantity(memory, resource.BinarySI)
if gpu > 0 {
result[gpuUtils.ResourceNvidiaGPU] = *resource.NewQuantity(gpu, resource.DecimalSI)
}
if pods > 0 {
result[apiv1.ResourcePods] = *resource.NewQuantity(pods, resource.DecimalSI)
}
return result, nil
}
func assertEqualResourceLists(t *testing.T, name string, expected, actual apiv1.ResourceList) {
t.Helper()
assert.True(t, quota.V1Equals(expected, actual),
"%q unequal:\nExpected: %v\nActual: %v", name, stringifyResourceList(expected), stringifyResourceList(actual))
}
func stringifyResourceList(resourceList apiv1.ResourceList) string {
resourceNames := []apiv1.ResourceName{
apiv1.ResourcePods, apiv1.ResourceCPU, gpuUtils.ResourceNvidiaGPU, apiv1.ResourceMemory, apiv1.ResourceEphemeralStorage}
var results []string
for _, name := range resourceNames {
quantity, found := resourceList[name]
if found {
value := quantity.Value()
if name == apiv1.ResourceCPU {
value = quantity.MilliValue()
}
results = append(results, fmt.Sprintf("%v: %v", string(name), value))
}
}
return strings.Join(results, ", ")
}

View File

@ -117,10 +117,10 @@ func buildResourceLimiter(cluster *gke_api_beta.Cluster) *cloudprovider.Resource
// GKE API provides memory in GB, but ResourceLimiter expects them in bytes
if _, found := minLimits[cloudprovider.ResourceNameMemory]; found {
minLimits[cloudprovider.ResourceNameMemory] = minLimits[cloudprovider.ResourceNameMemory] * units.Gigabyte
minLimits[cloudprovider.ResourceNameMemory] = minLimits[cloudprovider.ResourceNameMemory] * units.GiB
}
if _, found := maxLimits[cloudprovider.ResourceNameMemory]; found {
maxLimits[cloudprovider.ResourceNameMemory] = maxLimits[cloudprovider.ResourceNameMemory] * units.Gigabyte
maxLimits[cloudprovider.ResourceNameMemory] = maxLimits[cloudprovider.ResourceNameMemory] * units.GiB
}
return cloudprovider.NewResourceLimiter(minLimits, maxLimits)

View File

@ -29,6 +29,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/wait"
@ -622,7 +623,7 @@ func (m *gkeManagerImpl) getCpuAndMemoryForMachineType(machineType string, zone
}
m.cache.AddMachineToCache(machineType, zone, machine)
}
return machine.GuestCpus, machine.MemoryMb * bytesPerMB, nil
return machine.GuestCpus, machine.MemoryMb * units.MiB, nil
}
func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
@ -636,6 +637,6 @@ func parseCustomMachineType(machineType string) (cpu, mem int64, err error) {
return 0, 0, fmt.Errorf("failed to parse all params in %s", machineType)
}
// Mb to bytes
mem = mem * bytesPerMB
mem = mem * units.MiB
return
}

View File

@ -24,6 +24,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
@ -1158,14 +1159,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) {
cpu, mem, err := g.getCpuAndMemoryForMachineType("custom-8-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(8), cpu)
assert.Equal(t, int64(2*bytesPerMB), mem)
assert.Equal(t, int64(2*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type found in cache.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-1", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(1), cpu)
assert.Equal(t, int64(1*bytesPerMB), mem)
assert.Equal(t, int64(1*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type not found in cache.
@ -1173,14 +1174,14 @@ func TestGetCpuAndMemoryForMachineType(t *testing.T) {
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type cached.
cpu, mem, err = g.getCpuAndMemoryForMachineType("n1-standard-2", zoneB)
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(3840*bytesPerMB), mem)
assert.Equal(t, int64(3840*units.MiB), mem)
mock.AssertExpectationsForObjects(t, server)
// Standard machine type not found in the zone.
@ -1195,7 +1196,7 @@ func TestParseCustomMachineType(t *testing.T) {
cpu, mem, err := parseCustomMachineType("custom-2-2816")
assert.NoError(t, err)
assert.Equal(t, int64(2), cpu)
assert.Equal(t, int64(2816*bytesPerMB), mem)
assert.Equal(t, int64(2816*units.MiB), mem)
cpu, mem, err = parseCustomMachineType("other-a2-2816")
assert.Error(t, err)
cpu, mem, err = parseCustomMachineType("other-2-2816")

View File

@ -0,0 +1,120 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gke
// There should be no imports as it is used standalone in e2e tests
const (
// MiB - MebiByte size (2^20)
MiB = 1024 * 1024
// Duplicating an upstream bug treating GB as 1000*MiB (we need to predict the end result accurately).
mbPerGB = 1000
millicoresPerCore = 1000
)
// PredictKubeReservedMemory calculates kube-reserved memory based on physical memory
func PredictKubeReservedMemory(physicalMemory int64) int64 {
return memoryReservedMiB(physicalMemory/MiB) * MiB
}
// PredictKubeReservedCpuMillicores calculates kube-reserved cpu based on physical cpu
func PredictKubeReservedCpuMillicores(physicalCpuMillicores int64) int64 {
return cpuReservedMillicores(physicalCpuMillicores)
}
type allocatableBracket struct {
threshold int64
marginalReservedRate float64
}
func memoryReservedMiB(memoryCapacityMiB int64) int64 {
if memoryCapacityMiB <= mbPerGB {
if memoryCapacityMiB <= 0 {
return 0
}
// The minimum reservation required for proper node operation is 255 MiB.
// For any node with less than 1 GB of memory use the minimum. Nodes with
// more memory will use the existing reservation thresholds.
return 255
}
return calculateReserved(memoryCapacityMiB, []allocatableBracket{
{
threshold: 0,
marginalReservedRate: 0.25,
},
{
threshold: 4 * mbPerGB,
marginalReservedRate: 0.2,
},
{
threshold: 8 * mbPerGB,
marginalReservedRate: 0.1,
},
{
threshold: 16 * mbPerGB,
marginalReservedRate: 0.06,
},
{
threshold: 128 * mbPerGB,
marginalReservedRate: 0.02,
},
})
}
func cpuReservedMillicores(cpuCapacityMillicores int64) int64 {
return calculateReserved(cpuCapacityMillicores, []allocatableBracket{
{
threshold: 0,
marginalReservedRate: 0.06,
},
{
threshold: 1 * millicoresPerCore,
marginalReservedRate: 0.01,
},
{
threshold: 2 * millicoresPerCore,
marginalReservedRate: 0.005,
},
{
threshold: 4 * millicoresPerCore,
marginalReservedRate: 0.0025,
},
})
}
// calculateReserved calculates reserved using capacity and a series of
// brackets as follows: the marginalReservedRate applies to all capacity
// greater than the bracket, but less than the next bracket. For example, if
// the first bracket is threshold: 0, rate:0.1, and the second bracket has
// threshold: 100, rate: 0.4, a capacity of 100 results in a reserved of
// 100*0.1 = 10, but a capacity of 200 results in a reserved of
// 10 + (200-100)*.4 = 50. Using brackets with marginal rates ensures that as
// capacity increases, reserved always increases, and never decreases.
func calculateReserved(capacity int64, brackets []allocatableBracket) int64 {
var reserved float64
for i, bracket := range brackets {
c := capacity
if i < len(brackets)-1 && brackets[i+1].threshold < capacity {
c = brackets[i+1].threshold
}
additionalReserved := float64(c-bracket.threshold) * bracket.marginalReservedRate
if additionalReserved > 0 {
reserved += additionalReserved
}
}
return int64(reserved)
}

View File

@ -0,0 +1,125 @@
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package gke
import (
"testing"
)
func TestPredictKubeReserved(t *testing.T) {
type testCase struct {
name string
function func(capacity int64) int64
capacity int64
expectedReserved int64
}
testCases := []testCase{
{
name: "zero memory capacity",
function: PredictKubeReservedMemory,
capacity: 0,
expectedReserved: 0,
},
{
name: "f1-micro",
function: PredictKubeReservedMemory,
capacity: 600 * MiB,
expectedReserved: 255 * MiB,
},
{
name: "between memory thresholds",
function: PredictKubeReservedMemory,
capacity: 2000 * MiB,
expectedReserved: 500 * MiB,
},
{
name: "at a memory threshold boundary",
function: PredictKubeReservedMemory,
capacity: 8000 * MiB,
expectedReserved: 1800 * MiB,
},
{
name: "exceeds highest memory threshold",
function: PredictKubeReservedMemory,
capacity: 200 * 1000 * MiB,
expectedReserved: 10760 * MiB,
},
{
name: "cpu sanity check",
function: PredictKubeReservedCpuMillicores,
capacity: 4000,
expectedReserved: 80,
},
}
for _, tc := range testCases {
if actualReserved := tc.function(tc.capacity); actualReserved != tc.expectedReserved {
t.Errorf("Test case: %s, Got f(%d Mb) = %d. Want %d", tc.name, tc.capacity, actualReserved, tc.expectedReserved)
}
}
}
func TestCalculateReserved(t *testing.T) {
type testCase struct {
name string
function func(capacity int64) int64
capacity int64
expectedReserved int64
}
testCases := []testCase{
{
name: "zero memory capacity",
function: memoryReservedMiB,
capacity: 0,
expectedReserved: 0,
},
{
name: "f1-micro",
function: memoryReservedMiB,
capacity: 600,
expectedReserved: 255,
},
{
name: "between memory thresholds",
function: memoryReservedMiB,
capacity: 2000,
expectedReserved: 500,
},
{
name: "at a memory threshold boundary",
function: memoryReservedMiB,
capacity: 8000,
expectedReserved: 1800,
},
{
name: "exceeds highest memory threshold",
function: memoryReservedMiB,
capacity: 200 * 1000,
expectedReserved: 10760,
},
{
name: "cpu sanity check",
function: cpuReservedMillicores,
capacity: 4 * millicoresPerCore,
expectedReserved: 80,
},
}
for _, tc := range testCases {
if actualReserved := tc.function(tc.capacity); actualReserved != tc.expectedReserved {
t.Errorf("Test case: %s, Got f(%d Mb) = %d. Want %d", tc.name, tc.capacity, actualReserved, tc.expectedReserved)
}
}
}

View File

@ -20,25 +20,14 @@ import (
"fmt"
"math/rand"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
)
const (
mbPerGB = 1000
bytesPerMB = 1000 * 1000
millicoresPerCore = 1000
// Kubelet "evictionHard: {memory.available}" is subtracted from
// capacity when calculating allocatable (on top of kube-reserved).
// We don't have a good place to get it from, but it has been hard-coded
// to 100Mi since at least k8s 1.4.
kubeletEvictionHardMemory = 100 * 1024 * 1024
)
// GkeTemplateBuilder builds templates for GKE cloud provider.
type GkeTemplateBuilder struct {
gce.GceTemplateBuilder
@ -60,7 +49,7 @@ func (t *GkeTemplateBuilder) BuildNodeFromMigSpec(mig *GkeMig, cpu int64, mem in
Labels: map[string]string{},
}
capacity, err := t.BuildCapacity(mig.Spec().MachineType, nil, mig.GceRef().Zone, cpu, mem)
capacity, err := t.BuildCapacity(cpu, mem, nil)
if err != nil {
return nil, err
}
@ -69,9 +58,11 @@ func (t *GkeTemplateBuilder) BuildNodeFromMigSpec(mig *GkeMig, cpu int64, mem in
capacity[gpu.ResourceNvidiaGPU] = gpuRequest.DeepCopy()
}
kubeReserved := t.BuildKubeReserved(cpu, mem)
node.Status = apiv1.NodeStatus{
Capacity: capacity,
Allocatable: t.BuildAllocatableFromCapacity(capacity),
Allocatable: t.CalculateAllocatable(capacity, kubeReserved),
}
labels, err := buildLabelsForAutoprovisionedMig(mig, nodeName)
@ -87,6 +78,17 @@ func (t *GkeTemplateBuilder) BuildNodeFromMigSpec(mig *GkeMig, cpu int64, mem in
return &node, nil
}
// BuildKubeReserved builds kube reserved resources based on node physical resources.
// See calculateReserved for more details
func (t *GkeTemplateBuilder) BuildKubeReserved(cpu, physicalMemory int64) apiv1.ResourceList {
cpuReservedMillicores := PredictKubeReservedCpuMillicores(cpu * 1000)
memoryReserved := PredictKubeReservedMemory(physicalMemory)
reserved := apiv1.ResourceList{}
reserved[apiv1.ResourceCPU] = *resource.NewMilliQuantity(cpuReservedMillicores, resource.DecimalSI)
reserved[apiv1.ResourceMemory] = *resource.NewQuantity(memoryReserved, resource.BinarySI)
return reserved
}
func buildLabelsForAutoprovisionedMig(mig *GkeMig, nodeName string) (map[string]string, error) {
// GenericLabels
labels, err := gce.BuildGenericLabels(mig.GceRef(), mig.Spec().MachineType, nodeName)

View File

@ -18,11 +18,13 @@ package gke
import (
"fmt"
"strings"
"testing"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/gce"
gpuUtils "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
gce_api "google.golang.org/api/compute/v1"
apiv1 "k8s.io/api/core/v1"
@ -35,104 +37,93 @@ import (
func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
type testCase struct {
kubeEnv string
name string
machineType string
accelerators []*gce_api.AcceleratorConfig
mig gce.Mig
capacityCpu int64
capacityMemory int64
allocatableCpu string
allocatableMemory string
gpuCount int64
expectedErr bool
scenario string
kubeEnv string
accelerators []*gce_api.AcceleratorConfig
mig gce.Mig
physicalCpu int64
physicalMemory int64
kubeReserved bool
reservedCpu string
reservedMemory string
expectedGpuCount int64
expectedErr bool
}
testCases := []testCase{{
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1024*1024) +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
name: "nodeName",
machineType: "custom-8-2",
accelerators: []*gce_api.AcceleratorConfig{
{AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3},
{AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8},
},
mig: &GkeMig{
gceRef: gce.GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
capacityCpu: 8,
capacityMemory: 200 * 1024 * 1024,
allocatableCpu: "7000m",
allocatableMemory: fmt.Sprintf("%v", 99*1024*1024),
gpuCount: 11,
expectedErr: false,
},
testCases := []testCase{
{
scenario: "kube-reserved present in kube-env",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
fmt.Sprintf("KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=%v\n", 1*units.MiB) +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
accelerators: []*gce_api.AcceleratorConfig{
{AcceleratorType: "nvidia-tesla-k80", AcceleratorCount: 3},
{AcceleratorType: "nvidia-tesla-p100", AcceleratorCount: 8},
},
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: true,
reservedCpu: "1000m",
reservedMemory: fmt.Sprintf("%v", 1*units.MiB),
expectedGpuCount: 11,
expectedErr: false,
},
{
scenario: "no kube-reserved in kube-env",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
name: "nodeName",
machineType: "custom-8-2",
mig: &GkeMig{
gceRef: gce.GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
capacityCpu: 8,
capacityMemory: 2 * 1024 * 1024,
allocatableCpu: "8000m",
allocatableMemory: fmt.Sprintf("%v", 2*1024*1024),
expectedErr: false,
physicalCpu: 8,
physicalMemory: 200 * units.MiB,
kubeReserved: false,
expectedGpuCount: 11,
expectedErr: false,
}, {
scenario: "totally messed up kube-env",
kubeEnv: "This kube-env is totally messed up",
name: "nodeName",
machineType: "custom-8-2",
mig: &GkeMig{
gceRef: gce.GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
},
expectedErr: true,
},
}
for _, tc := range testCases {
tb := &GkeTemplateBuilder{}
template := &gce_api.InstanceTemplate{
Name: tc.name,
Properties: &gce_api.InstanceProperties{
GuestAccelerators: tc.accelerators,
Metadata: &gce_api.Metadata{
Items: []*gce_api.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}},
t.Run(tc.scenario, func(t *testing.T) {
tb := &GkeTemplateBuilder{}
mig := &GkeMig{
gceRef: gce.GceRef{
Name: "some-name",
Project: "some-proj",
Zone: "us-central1-b",
},
MachineType: tc.machineType,
},
}
node, err := tb.BuildNodeFromTemplate(tc.mig, template, tc.capacityCpu, tc.capacityMemory)
if tc.expectedErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
podsQuantity, _ := resource.ParseQuantity("110")
capacity, err := makeResourceList(fmt.Sprintf("%dm", tc.capacityCpu*1000), fmt.Sprintf("%v", tc.capacityMemory), tc.gpuCount)
capacity[apiv1.ResourcePods] = podsQuantity
assert.NoError(t, err)
allocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount)
allocatable[apiv1.ResourcePods] = podsQuantity
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable)
}
}
template := &gce_api.InstanceTemplate{
Name: "node-name",
Properties: &gce_api.InstanceProperties{
GuestAccelerators: tc.accelerators,
Metadata: &gce_api.Metadata{
Items: []*gce_api.MetadataItems{{Key: "kube-env", Value: &tc.kubeEnv}},
},
MachineType: "irrelevant-type",
},
}
node, err := tb.BuildNodeFromTemplate(mig, template, tc.physicalCpu, tc.physicalMemory)
if tc.expectedErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators)
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
if !tc.kubeReserved {
assertEqualResourceLists(t, "Allocatable", capacity, node.Status.Allocatable)
} else {
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
assert.NoError(t, err)
allocatable := tb.CalculateAllocatable(capacity, reserved)
assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable)
}
}
})
}
}
@ -237,36 +228,32 @@ func TestBuildAllocatableFromKubeEnv(t *testing.T) {
}
}
func TestBuildAllocatableFromCapacity(t *testing.T) {
func TestBuildKubeReserved(t *testing.T) {
type testCase struct {
capacityCpu string
capacityMemory string
allocatableCpu string
allocatableMemory string
gpuCount int64
physicalCpu int64
reservedCpu string
physicalMemory int64
reservedMemory string
}
testCases := []testCase{{
capacityCpu: "16000m",
capacityMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB),
allocatableCpu: "15890m",
physicalCpu: 16,
reservedCpu: "110m",
// Below threshold for reserving memory
allocatableMemory: fmt.Sprintf("%v", 1*mbPerGB*bytesPerMB-kubeletEvictionHardMemory),
gpuCount: 1,
physicalMemory: units.GB,
reservedMemory: fmt.Sprintf("%v", 255*units.MiB),
}, {
capacityCpu: "500m",
capacityMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB),
allocatableCpu: "470m",
// Final 1024*1024 because we're duplicating upstream bug using MB as MiB
allocatableMemory: fmt.Sprintf("%v", 1.1*mbPerGB*bytesPerMB-0.25*1.1*mbPerGB*1024*1024-kubeletEvictionHardMemory),
physicalCpu: 1,
reservedCpu: "60m",
// 10760Mi = 0.25*4000Mi + 0.2*4000Mi + 0.1*8000Mi + 0.06*112000Mi + 0.02*72000Mi
physicalMemory: 200 * 1000 * units.MiB,
reservedMemory: fmt.Sprintf("%v", 10760*units.MiB),
}}
for _, tc := range testCases {
tb := GkeTemplateBuilder{}
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, tc.gpuCount)
expectedReserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
assert.NoError(t, err)
expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, tc.gpuCount)
assert.NoError(t, err)
allocatable := tb.BuildAllocatableFromCapacity(capacity)
assertEqualResourceLists(t, "Allocatable", expectedAllocatable, allocatable)
kubeReserved := tb.BuildKubeReserved(tc.physicalCpu, tc.physicalMemory)
assertEqualResourceLists(t, "Kube reserved", expectedReserved, kubeReserved)
}
}
@ -293,5 +280,24 @@ func makeResourceList(cpu string, memory string, gpu int64) (apiv1.ResourceList,
}
func assertEqualResourceLists(t *testing.T, name string, expected, actual apiv1.ResourceList) {
assert.True(t, quota.V1Equals(expected, actual), "%q unequal:\nExpected:%v\nActual:%v", name, expected, actual)
t.Helper()
assert.True(t, quota.V1Equals(expected, actual),
"%q unequal:\nExpected: %v\nActual: %v", name, stringifyResourceList(expected), stringifyResourceList(actual))
}
func stringifyResourceList(resourceList apiv1.ResourceList) string {
resourceNames := []apiv1.ResourceName{
apiv1.ResourcePods, apiv1.ResourceCPU, gpuUtils.ResourceNvidiaGPU, apiv1.ResourceMemory, apiv1.ResourceEphemeralStorage}
var results []string
for _, name := range resourceNames {
quantity, found := resourceList[name]
if found {
value := quantity.Value()
if name == apiv1.ResourceCPU {
value = quantity.MilliValue()
}
results = append(results, fmt.Sprintf("%v: %v", string(name), value))
}
}
return strings.Join(results, ", ")
}

View File

@ -819,7 +819,7 @@ var defaultScaleDownOptions = config.AutoscalingOptions{
MinCoresTotal: 0,
MinMemoryTotal: 0,
MaxCoresTotal: config.DefaultMaxClusterCores,
MaxMemoryTotal: config.DefaultMaxClusterMemory * units.Gigabyte,
MaxMemoryTotal: config.DefaultMaxClusterMemory * units.GiB,
}
func TestScaleDownEmptyMultipleNodeGroups(t *testing.T) {
@ -864,13 +864,13 @@ func TestScaleDownEmptyMinCoresLimitHit(t *testing.T) {
func TestScaleDownEmptyMinMemoryLimitHit(t *testing.T) {
options := defaultScaleDownOptions
options.MinMemoryTotal = 4000 * MB
options.MinMemoryTotal = 4000 * MiB
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 2000, 1000 * MB, 0, true, "ng1"},
{"n2", 1000, 1000 * MB, 0, true, "ng1"},
{"n3", 1000, 1000 * MB, 0, true, "ng1"},
{"n4", 1000, 3000 * MB, 0, true, "ng1"},
{"n1", 2000, 1000 * MiB, 0, true, "ng1"},
{"n2", 1000, 1000 * MiB, 0, true, "ng1"},
{"n3", 1000, 1000 * MiB, 0, true, "ng1"},
{"n4", 1000, 3000 * MiB, 0, true, "ng1"},
},
options: options,
expectedScaleDowns: []string{"n1", "n2"},
@ -894,12 +894,12 @@ func TestScaleDownEmptyMinGpuLimitHit(t *testing.T) {
}
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 1000, 1000 * MB, 1, true, "ng1"},
{"n2", 1000, 1000 * MB, 1, true, "ng1"},
{"n3", 1000, 1000 * MB, 1, true, "ng1"},
{"n4", 1000, 1000 * MB, 1, true, "ng1"},
{"n5", 1000, 1000 * MB, 1, true, "ng1"},
{"n6", 1000, 1000 * MB, 1, true, "ng1"},
{"n1", 1000, 1000 * MiB, 1, true, "ng1"},
{"n2", 1000, 1000 * MiB, 1, true, "ng1"},
{"n3", 1000, 1000 * MiB, 1, true, "ng1"},
{"n4", 1000, 1000 * MiB, 1, true, "ng1"},
{"n5", 1000, 1000 * MiB, 1, true, "ng1"},
{"n6", 1000, 1000 * MiB, 1, true, "ng1"},
},
options: options,
expectedScaleDowns: []string{"n1", "n2"},
@ -1200,13 +1200,13 @@ func getCountOfChan(c chan string) int {
func TestCalculateCoresAndMemoryTotal(t *testing.T) {
nodeConfigs := []nodeConfig{
{"n1", 2000, 7500 * MB, 0, true, "ng1"},
{"n2", 2000, 7500 * MB, 0, true, "ng1"},
{"n3", 2000, 7500 * MB, 0, true, "ng1"},
{"n4", 12000, 8000 * MB, 0, true, "ng1"},
{"n5", 16000, 7500 * MB, 0, true, "ng1"},
{"n6", 8000, 6000 * MB, 0, true, "ng1"},
{"n7", 6000, 16000 * MB, 0, true, "ng1"},
{"n1", 2000, 7500 * MiB, 0, true, "ng1"},
{"n2", 2000, 7500 * MiB, 0, true, "ng1"},
{"n3", 2000, 7500 * MiB, 0, true, "ng1"},
{"n4", 12000, 8000 * MiB, 0, true, "ng1"},
{"n5", 16000, 7500 * MiB, 0, true, "ng1"},
{"n6", 8000, 6000 * MiB, 0, true, "ng1"},
{"n7", 6000, 16000 * MiB, 0, true, "ng1"},
}
nodes := make([]*apiv1.Node, len(nodeConfigs))
for i, n := range nodeConfigs {
@ -1226,7 +1226,7 @@ func TestCalculateCoresAndMemoryTotal(t *testing.T) {
coresTotal, memoryTotal := calculateScaleDownCoresMemoryTotal(nodes, time.Now())
assert.Equal(t, int64(42), coresTotal)
assert.Equal(t, int64(44000*MB), memoryTotal)
assert.Equal(t, int64(44000*MiB), memoryTotal)
}
func TestFilterOutMasters(t *testing.T) {

View File

@ -46,7 +46,7 @@ import (
var defaultOptions = config.AutoscalingOptions{
EstimatorName: estimator.BinpackingEstimatorName,
MaxCoresTotal: config.DefaultMaxClusterCores,
MaxMemoryTotal: config.DefaultMaxClusterMemory * units.Gigabyte,
MaxMemoryTotal: config.DefaultMaxClusterMemory * units.GiB,
MinCoresTotal: 0,
MinMemoryTotal: 0,
}
@ -120,24 +120,22 @@ func TestScaleUpMaxCoresLimitHitWithNotAutoscaledGroup(t *testing.T) {
simpleScaleUpTest(t, config)
}
const MB = 1024 * 1024
func TestScaleUpMaxMemoryLimitHit(t *testing.T) {
options := defaultOptions
options.MaxMemoryTotal = 1300 * MB
options.MaxMemoryTotal = 1300 * MiB
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 2000, 100 * MB, 0, true, "ng1"},
{"n2", 4000, 1000 * MB, 0, true, "ng2"},
{"n1", 2000, 100 * MiB, 0, true, "ng1"},
{"n2", 4000, 1000 * MiB, 0, true, "ng2"},
},
pods: []podConfig{
{"p1", 1000, 0, 0, "n1"},
{"p2", 3000, 0, 0, "n2"},
},
extraPods: []podConfig{
{"p-new-1", 2000, 100 * MB, 0, ""},
{"p-new-2", 2000, 100 * MB, 0, ""},
{"p-new-3", 2000, 100 * MB, 0, ""},
{"p-new-1", 2000, 100 * MiB, 0, ""},
{"p-new-2", 2000, 100 * MiB, 0, ""},
{"p-new-3", 2000, 100 * MiB, 0, ""},
},
scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 3},
expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 2},
@ -149,20 +147,20 @@ func TestScaleUpMaxMemoryLimitHit(t *testing.T) {
func TestScaleUpMaxMemoryLimitHitWithNotAutoscaledGroup(t *testing.T) {
options := defaultOptions
options.MaxMemoryTotal = 1300 * MB
options.MaxMemoryTotal = 1300 * MiB
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 2000, 100 * MB, 0, true, "ng1"},
{"n2", 4000, 1000 * MB, 0, true, ""},
{"n1", 2000, 100 * MiB, 0, true, "ng1"},
{"n2", 4000, 1000 * MiB, 0, true, ""},
},
pods: []podConfig{
{"p1", 1000, 0, 0, "n1"},
{"p2", 3000, 0, 0, "n2"},
},
extraPods: []podConfig{
{"p-new-1", 2000, 100 * MB, 0, ""},
{"p-new-2", 2000, 100 * MB, 0, ""},
{"p-new-3", 2000, 100 * MB, 0, ""},
{"p-new-1", 2000, 100 * MiB, 0, ""},
{"p-new-2", 2000, 100 * MiB, 0, ""},
{"p-new-3", 2000, 100 * MiB, 0, ""},
},
scaleUpOptionToChoose: groupSizeChange{groupName: "ng1", sizeChange: 3},
expectedFinalScaleUp: groupSizeChange{groupName: "ng1", sizeChange: 2},
@ -177,17 +175,17 @@ func TestScaleUpCapToMaxTotalNodesLimit(t *testing.T) {
options.MaxNodesTotal = 3
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 2000, 100 * MB, 0, true, "ng1"},
{"n2", 4000, 1000 * MB, 0, true, "ng2"},
{"n1", 2000, 100 * MiB, 0, true, "ng1"},
{"n2", 4000, 1000 * MiB, 0, true, "ng2"},
},
pods: []podConfig{
{"p1", 1000, 0, 0, "n1"},
{"p2", 3000, 0, 0, "n2"},
},
extraPods: []podConfig{
{"p-new-1", 4000, 100 * MB, 0, ""},
{"p-new-2", 4000, 100 * MB, 0, ""},
{"p-new-3", 4000, 100 * MB, 0, ""},
{"p-new-1", 4000, 100 * MiB, 0, ""},
{"p-new-2", 4000, 100 * MiB, 0, ""},
{"p-new-3", 4000, 100 * MiB, 0, ""},
},
scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 3},
expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1},
@ -202,17 +200,17 @@ func TestScaleUpCapToMaxTotalNodesLimitWithNotAutoscaledGroup(t *testing.T) {
options.MaxNodesTotal = 3
config := &scaleTestConfig{
nodes: []nodeConfig{
{"n1", 2000, 100 * MB, 0, true, ""},
{"n2", 4000, 1000 * MB, 0, true, "ng2"},
{"n1", 2000, 100 * MiB, 0, true, ""},
{"n2", 4000, 1000 * MiB, 0, true, "ng2"},
},
pods: []podConfig{
{"p1", 1000, 0, 0, "n1"},
{"p2", 3000, 0, 0, "n2"},
},
extraPods: []podConfig{
{"p-new-1", 4000, 100 * MB, 0, ""},
{"p-new-2", 4000, 100 * MB, 0, ""},
{"p-new-3", 4000, 100 * MB, 0, ""},
{"p-new-1", 4000, 100 * MiB, 0, ""},
{"p-new-2", 4000, 100 * MiB, 0, ""},
{"p-new-3", 4000, 100 * MiB, 0, ""},
},
scaleUpOptionToChoose: groupSizeChange{groupName: "ng2", sizeChange: 3},
expectedFinalScaleUp: groupSizeChange{groupName: "ng2", sizeChange: 1},
@ -227,15 +225,15 @@ func TestWillConsiderGpuAndStandardPoolForPodWhichDoesNotRequireGpu(t *testing.T
options.MaxNodesTotal = 100
config := &scaleTestConfig{
nodes: []nodeConfig{
{"gpu-node-1", 2000, 1000 * MB, 1, true, "gpu-pool"},
{"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"},
{"gpu-node-1", 2000, 1000 * MiB, 1, true, "gpu-pool"},
{"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"},
},
pods: []podConfig{
{"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-node-1"},
{"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"},
{"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-node-1"},
{"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"},
},
extraPods: []podConfig{
{"extra-std-pod", 2000, 1000 * MB, 0, ""},
{"extra-std-pod", 2000, 1000 * MiB, 0, ""},
},
expectedScaleUpOptions: []groupSizeChange{
{groupName: "std-pool", sizeChange: 1},
@ -254,15 +252,15 @@ func TestWillConsiderOnlyGpuPoolForPodWhichDoesRequiresGpu(t *testing.T) {
options.MaxNodesTotal = 100
config := &scaleTestConfig{
nodes: []nodeConfig{
{"gpu-node-1", 2000, 1000 * MB, 1, true, "gpu-pool"},
{"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"},
{"gpu-node-1", 2000, 1000 * MiB, 1, true, "gpu-pool"},
{"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"},
},
pods: []podConfig{
{"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-node-1"},
{"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"},
{"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-node-1"},
{"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"},
},
extraPods: []podConfig{
{"extra-gpu-pod", 2000, 1000 * MB, 1, ""},
{"extra-gpu-pod", 2000, 1000 * MiB, 1, ""},
},
expectedScaleUpOptions: []groupSizeChange{
{groupName: "gpu-pool", sizeChange: 1},
@ -280,21 +278,21 @@ func TestWillConsiderAllPoolsWhichFitTwoPodsRequiringGpus(t *testing.T) {
options.MaxNodesTotal = 100
config := &scaleTestConfig{
nodes: []nodeConfig{
{"gpu-1-node-1", 2000, 1000 * MB, 1, true, "gpu-1-pool"},
{"gpu-2-node-1", 2000, 1000 * MB, 2, true, "gpu-2-pool"},
{"gpu-4-node-1", 2000, 1000 * MB, 4, true, "gpu-4-pool"},
{"std-node-1", 2000, 1000 * MB, 0, true, "std-pool"},
{"gpu-1-node-1", 2000, 1000 * MiB, 1, true, "gpu-1-pool"},
{"gpu-2-node-1", 2000, 1000 * MiB, 2, true, "gpu-2-pool"},
{"gpu-4-node-1", 2000, 1000 * MiB, 4, true, "gpu-4-pool"},
{"std-node-1", 2000, 1000 * MiB, 0, true, "std-pool"},
},
pods: []podConfig{
{"gpu-pod-1", 2000, 1000 * MB, 1, "gpu-1-node-1"},
{"gpu-pod-2", 2000, 1000 * MB, 2, "gpu-2-node-1"},
{"gpu-pod-3", 2000, 1000 * MB, 4, "gpu-4-node-1"},
{"std-pod-1", 2000, 1000 * MB, 0, "std-node-1"},
{"gpu-pod-1", 2000, 1000 * MiB, 1, "gpu-1-node-1"},
{"gpu-pod-2", 2000, 1000 * MiB, 2, "gpu-2-node-1"},
{"gpu-pod-3", 2000, 1000 * MiB, 4, "gpu-4-node-1"},
{"std-pod-1", 2000, 1000 * MiB, 0, "std-node-1"},
},
extraPods: []podConfig{
{"extra-gpu-pod-1", 1, 1 * MB, 1, ""}, // CPU and mem negligible
{"extra-gpu-pod-2", 1, 1 * MB, 1, ""}, // CPU and mem negligible
{"extra-gpu-pod-3", 1, 1 * MB, 1, ""}, // CPU and mem negligible
{"extra-gpu-pod-1", 1, 1 * MiB, 1, ""}, // CPU and mem negligible
{"extra-gpu-pod-2", 1, 1 * MiB, 1, ""}, // CPU and mem negligible
{"extra-gpu-pod-3", 1, 1 * MiB, 1, ""}, // CPU and mem negligible
},
expectedScaleUpOptions: []groupSizeChange{
{groupName: "gpu-1-pool", sizeChange: 3},

View File

@ -43,6 +43,8 @@ import (
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
)
const MiB = 1024 * 1024
func TestPodSchedulableMap(t *testing.T) {
rc1 := apiv1.ReplicationController{
ObjectMeta: metav1.ObjectMeta{
@ -661,19 +663,19 @@ func TestConfigurePredicateCheckerForLoop(t *testing.T) {
}
func TestGetNodeResource(t *testing.T) {
node := BuildTestNode("n1", 1000, 2*MB)
node := BuildTestNode("n1", 1000, 2*MiB)
cores := getNodeResource(node, apiv1.ResourceCPU)
assert.Equal(t, int64(1), cores)
memory := getNodeResource(node, apiv1.ResourceMemory)
assert.Equal(t, int64(2*MB), memory)
assert.Equal(t, int64(2*MiB), memory)
unknownResourceValue := getNodeResource(node, "unknown resource")
assert.Equal(t, int64(0), unknownResourceValue)
// if we have no resources in capacity we expect getNodeResource to return 0
nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MB)
nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MiB)
nodeWithMissingCapacity.Status.Capacity = apiv1.ResourceList{}
cores = getNodeResource(nodeWithMissingCapacity, apiv1.ResourceCPU)
@ -683,7 +685,7 @@ func TestGetNodeResource(t *testing.T) {
assert.Equal(t, int64(0), memory)
// if we have negative values in resources we expect getNodeResource to return 0
nodeWithNegativeCapacity := BuildTestNode("n1", -1000, -2*MB)
nodeWithNegativeCapacity := BuildTestNode("n1", -1000, -2*MiB)
nodeWithNegativeCapacity.Status.Capacity = apiv1.ResourceList{}
cores = getNodeResource(nodeWithNegativeCapacity, apiv1.ResourceCPU)
@ -695,14 +697,14 @@ func TestGetNodeResource(t *testing.T) {
}
func TestGetNodeCoresAndMemory(t *testing.T) {
node := BuildTestNode("n1", 2000, 2048*MB)
node := BuildTestNode("n1", 2000, 2048*MiB)
cores, memory := getNodeCoresAndMemory(node)
assert.Equal(t, int64(2), cores)
assert.Equal(t, int64(2048*MB), memory)
assert.Equal(t, int64(2048*MiB), memory)
// if we have no cpu/memory defined in capacity we expect getNodeCoresAndMemory to return 0s
nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MB)
nodeWithMissingCapacity := BuildTestNode("n1", 1000, 2*MiB)
nodeWithMissingCapacity.Status.Capacity = apiv1.ResourceList{}
cores, memory = getNodeCoresAndMemory(nodeWithMissingCapacity)

View File

@ -21,6 +21,7 @@ import (
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/stretchr/testify/assert"
@ -45,7 +46,7 @@ func makePod(cpuPerPod, memoryPerPod int64) *apiv1.Pod {
func TestEstimate(t *testing.T) {
cpuPerPod := int64(500)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pods := []*apiv1.Pod{}
@ -81,7 +82,7 @@ func TestEstimate(t *testing.T) {
func TestEstimateWithComing(t *testing.T) {
cpuPerPod := int64(500)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pods := []*apiv1.Pod{}
@ -119,7 +120,7 @@ func TestEstimateWithComing(t *testing.T) {
func TestEstimateWithPorts(t *testing.T) {
cpuPerPod := int64(500)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pod.Spec.Containers[0].Ports = []apiv1.ContainerPort{

View File

@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/simulator"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"github.com/stretchr/testify/assert"
@ -33,7 +34,7 @@ func TestBinpackingEstimate(t *testing.T) {
estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker())
cpuPerPod := int64(350)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pods := make([]*apiv1.Pod, 0)
@ -62,7 +63,7 @@ func TestBinpackingEstimateComingNodes(t *testing.T) {
estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker())
cpuPerPod := int64(350)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pods := make([]*apiv1.Pod, 0)
@ -92,7 +93,7 @@ func TestBinpackingEstimateWithPorts(t *testing.T) {
estimator := NewBinpackingNodeEstimator(simulator.NewTestPredicateChecker())
cpuPerPod := int64(200)
memoryPerPod := int64(1000 * 1024 * 1024)
memoryPerPod := int64(1000 * units.MiB)
pod := makePod(cpuPerPod, memoryPerPod)
pod.Spec.Containers[0].Ports = []apiv1.ContainerPort{
{

View File

@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
)
// SimplePreferredNodeProvider returns preferred node based on the cluster size.
@ -46,22 +47,21 @@ func (spnp *SimplePreferredNodeProvider) Node() (*apiv1.Node, error) {
}
size := len(nodes)
mb := int64(1024 * 1024)
cpu := int64(1000)
// Double node size with every time the cluster size increases 3x.
if size <= 2 {
return buildNode(1*cpu, 3750*mb), nil
return buildNode(1*cpu, 3750*units.MiB), nil
} else if size <= 6 {
return buildNode(2*cpu, 7500*mb), nil
return buildNode(2*cpu, 7500*units.MiB), nil
} else if size <= 20 {
return buildNode(4*cpu, 15000*mb), nil
return buildNode(4*cpu, 15000*units.MiB), nil
} else if size <= 60 {
return buildNode(8*cpu, 30000*mb), nil
return buildNode(8*cpu, 30000*units.MiB), nil
} else if size <= 200 {
return buildNode(16*cpu, 60000*mb), nil
return buildNode(16*cpu, 60000*units.MiB), nil
}
return buildNode(32*cpu, 120000*mb), nil
return buildNode(32*cpu, 120000*units.MiB), nil
}
func buildNode(millicpu int64, mem int64) *apiv1.Node {

View File

@ -27,6 +27,7 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/expander"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
schedulercache "k8s.io/kubernetes/pkg/scheduler/cache"
"k8s.io/klog"
@ -47,11 +48,11 @@ type priceBased struct {
var (
// defaultPreferredNode is the node that is preferred if PreferredNodeProvider fails.
// 4 cpu, 16gb ram.
defaultPreferredNode = buildNode(4*1000, 4*4*1024*1024*1024)
defaultPreferredNode = buildNode(4*1000, 4*4*units.GiB)
// priceStabilizationPod is the pod cost to stabilize node_cost/pod_cost ratio a bit.
// 0.5 cpu, 500 mb ram
priceStabilizationPod = buildPod("stabilize", 500, 500*1024*1024)
priceStabilizationPod = buildPod("stabilize", 500, 500*units.MiB)
// Penalty given to node groups that are yet to be created.
// TODO: make it a flag

View File

@ -22,6 +22,7 @@ import (
"time"
"k8s.io/autoscaler/cluster-autoscaler/expander"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
apiv1 "k8s.io/api/core/v1"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
@ -115,7 +116,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options, nodeInfosForGroups).Debug, "ng1")
@ -134,7 +135,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(4000, 1024*1024*1024),
preferred: buildNode(4000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options, nodeInfosForGroups).Debug, "ng2")
@ -169,7 +170,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(4000, 1024*1024*1024),
preferred: buildNode(4000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options1b, nodeInfosForGroups).Debug, "ng1")
@ -188,7 +189,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options, nodeInfosForGroups).Debug, "ng2")
@ -224,7 +225,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options2, nodeInfosForGroups).Debug, "ng2")
@ -236,7 +237,7 @@ func TestPriceExpander(t *testing.T) {
nodePrice: map[string]float64{},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options2, nodeInfosForGroups))
@ -280,7 +281,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options3, nodeInfosForGroups).Debug, "ng2")
@ -300,7 +301,7 @@ func TestPriceExpander(t *testing.T) {
},
},
&testPreferredNodeProvider{
preferred: buildNode(2000, 1024*1024*1024),
preferred: buildNode(2000, units.GiB),
},
SimpleNodeUnfitness,
).BestOption(options3, nodeInfosForGroups).Debug, "ng3")

View File

@ -174,8 +174,8 @@ func createAutoscalingOptions() config.AutoscalingOptions {
klog.Fatalf("Failed to parse flags: %v", err)
}
// Convert memory limits to bytes.
minMemoryTotal = minMemoryTotal * units.Gigabyte
maxMemoryTotal = maxMemoryTotal * units.Gigabyte
minMemoryTotal = minMemoryTotal * units.GiB
maxMemoryTotal = maxMemoryTotal * units.GiB
parsedGpuTotal, err := parseMultipleGpuLimits(*gpuTotal)
if err != nil {

View File

@ -17,6 +17,12 @@ limitations under the License.
package units
const (
// Gigabyte is 2^30 bytes.
Gigabyte = 1024 * 1024 * 1024
// GB - GigaByte size (10^9)
GB = 1000 * 1000 * 1000
// GiB - GibiByte size (2^30)
GiB = 1024 * 1024 * 1024
// MB - MegaByte size (10^6)
MB = 1000 * 1000
// MiB - MebiByte size (2^20)
MiB = 1024 * 1024
)