adding support for ephemeral storage in build node from template which allows for scale from 0 for pods with ephemeral s

This commit is contained in:
Jayant Jain 2020-12-15 12:46:42 +00:00
parent 7af23baf76
commit bc5a20ea4e
4 changed files with 487 additions and 74 deletions

View File

@ -16,7 +16,15 @@ limitations under the License.
package gce
import klog "k8s.io/klog/v2"
import (
"fmt"
"math"
"strconv"
"strings"
"k8s.io/apimachinery/pkg/api/resource"
klog "k8s.io/klog/v2"
)
// There should be no imports as it is used standalone in e2e tests
@ -26,12 +34,22 @@ const (
// GiB - GibiByte size (2^30)
GiB = 1024 * 1024 * 1024
// KubeletEvictionHardMemory is subtracted from capacity
// MemoryEvictionHardTag tag passed by kubelet used to determine evictionHard values
MemoryEvictionHardTag = "memory.available"
// EphemeralStorageEvictionHardTag tag passed by kubelet used to determine evictionHard values
EphemeralStorageEvictionHardTag = "nodefs.available"
// defaultKubeletEvictionHardMemory is subtracted from capacity
// when calculating allocatable (on top of kube-reserved).
// Equals kubelet "evictionHard: {memory.available}"
// We don't have a good place to get it from, but it has been hard-coded
// to 100Mi since at least k8s 1.4.
KubeletEvictionHardMemory = 100 * MiB
// Equals kubelet "evictionHard: {MemoryEvictionHardTag}"
// It is hardcoded as a fallback when it is not passed by kubelet.
defaultKubeletEvictionHardMemory = 100 * MiB
// defaultKubeletEvictionHardEphemeralStorageRatio is the ratio of disk size to be blocked for eviction
// subtracted from capacity when calculating allocatable (on top of kube-reserved).
// Equals kubelet "evictionHard: {EphemeralStorageEvictionHardTag}"
// It is hardcoded as a fallback when it is not passed by kubelet.
defaultKubeletEvictionHardEphemeralStorageRatio = 0.1
// Kernel reserved memory is subtracted when calculating total memory.
kernelReservedRatio = 64
@ -41,6 +59,12 @@ const (
swiotlbThresholdMemory = 3 * GiB
)
// EvictionHard is the struct used to keep parsed values for eviction
type EvictionHard struct {
MemoryEvictionQuantity int64
EphemeralStorageEvictionRatio float64
}
// CalculateKernelReserved computes how much memory Linux kernel will reserve.
// TODO(jkaniuk): account for crashkernel reservation on RHEL / CentOS
func CalculateKernelReserved(physicalMemory int64, os OperatingSystem) int64 {
@ -57,7 +81,111 @@ func CalculateKernelReserved(physicalMemory int64, os OperatingSystem) int64 {
case OperatingSystemWindows:
return 0
default:
klog.Errorf("CalculateKernelReserved called for unknown operatin system %v", os)
klog.Errorf("CalculateKernelReserved called for unknown operating system %v", os)
return 0
}
}
// ParseEvictionHardOrGetDefault tries to parse evictionHard map, else fills defaults to be used.
func ParseEvictionHardOrGetDefault(evictionHard map[string]string) *EvictionHard {
if evictionHard == nil {
return &EvictionHard{
MemoryEvictionQuantity: defaultKubeletEvictionHardMemory,
EphemeralStorageEvictionRatio: defaultKubeletEvictionHardEphemeralStorageRatio,
}
}
evictionReturn := EvictionHard{}
// CalculateOrDefault for Memory
memory, found := evictionHard[MemoryEvictionHardTag]
if !found {
klog.V(4).Info("evictionHard memory tag not found, using default")
evictionReturn.MemoryEvictionQuantity = defaultKubeletEvictionHardMemory
} else {
memQuantity, err := resource.ParseQuantity(memory)
if err != nil {
evictionReturn.MemoryEvictionQuantity = defaultKubeletEvictionHardMemory
} else {
value, possible := memQuantity.AsInt64()
if !possible {
klog.Errorf("unable to parse eviction ratio for memory: %v", err)
evictionReturn.MemoryEvictionQuantity = defaultKubeletEvictionHardMemory
} else {
evictionReturn.MemoryEvictionQuantity = value
}
}
}
// CalculateOrDefault for Ephemeral Storage
ephRatio, found := evictionHard[EphemeralStorageEvictionHardTag]
if !found {
klog.V(4).Info("evictionHard ephemeral storage tag not found, using default")
evictionReturn.EphemeralStorageEvictionRatio = defaultKubeletEvictionHardEphemeralStorageRatio
} else {
value, err := parsePercentageToRatio(ephRatio)
if err != nil {
klog.Errorf("unable to parse eviction ratio for ephemeral storage: %v", err)
evictionReturn.EphemeralStorageEvictionRatio = defaultKubeletEvictionHardEphemeralStorageRatio
} else {
if value < 0.0 || value > 1.0 {
evictionReturn.EphemeralStorageEvictionRatio = defaultKubeletEvictionHardEphemeralStorageRatio
} else {
evictionReturn.EphemeralStorageEvictionRatio = value
}
}
}
return &evictionReturn
}
// GetKubeletEvictionHardForMemory calculates the evictionHard value for Memory.
func GetKubeletEvictionHardForMemory(evictionHard *EvictionHard) int64 {
if evictionHard == nil {
return defaultKubeletEvictionHardMemory
}
return evictionHard.MemoryEvictionQuantity
}
// GetKubeletEvictionHardForEphemeralStorage calculates the evictionHard value for Ephemeral Storage.
func GetKubeletEvictionHardForEphemeralStorage(diskSize int64, evictionHard *EvictionHard) float64 {
if diskSize <= 0 {
return 0
}
if evictionHard == nil {
return defaultKubeletEvictionHardEphemeralStorageRatio * float64(diskSize)
}
return evictionHard.EphemeralStorageEvictionRatio * float64(diskSize)
}
func parsePercentageToRatio(percentString string) (float64, error) {
i := strings.Index(percentString, "%")
if i < 0 || i != len(percentString)-1 {
return 0, fmt.Errorf("parsePercentageRatio: percentage sign not found")
}
percentVal, err := strconv.ParseFloat(percentString[:i], 64)
if err != nil {
return 0, err
}
return percentVal / 100, nil
}
// CalculateOSReservedEphemeralStorage estimates how much ephemeral storage OS will reserve and eviction threshold
func CalculateOSReservedEphemeralStorage(diskSize int64, osDistribution OperatingSystemDistribution) int64 {
switch osDistribution {
case OperatingSystemDistributionCOS:
storage := int64(math.Ceil(0.015635*float64(diskSize))) + int64(math.Ceil(4.148*GiB)) // os partition estimation
storage += int64(math.Min(100*MiB, math.Ceil(0.001*float64(diskSize)))) // over-provisioning buffer
return storage
case OperatingSystemDistributionUbuntu:
storage := int64(math.Ceil(0.03083*float64(diskSize))) + int64(math.Ceil(0.171*GiB)) // os partition estimation
storage += int64(math.Min(100*MiB, math.Ceil(0.001*float64(diskSize)))) // over-provisioning buffer
return storage
case OperatingSystemDistributionWindowsLTSC, OperatingSystemDistributionWindowsSAC:
storage := int64(math.Ceil(0.1133 * GiB)) // os partition estimation
storage += int64(math.Ceil(0.010 * GiB)) // over-provisioning buffer
return storage
default:
klog.Errorf("CalculateReservedAndEvictionEphemeralStorage called for unknown os distribution %v", osDistribution)
return 0
}
}

View File

@ -18,6 +18,7 @@ package gce
import (
"fmt"
"math"
"math/rand"
"regexp"
"strings"
@ -28,6 +29,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis"
"github.com/ghodss/yaml"
@ -52,7 +54,7 @@ func (t *GceTemplateBuilder) getAcceleratorCount(accelerators []*gce.Accelerator
}
// BuildCapacity builds a list of resource capacities given list of hardware.
func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig, os OperatingSystem, pods *int64) (apiv1.ResourceList, error) {
func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []*gce.AcceleratorConfig, os OperatingSystem, osDistribution OperatingSystemDistribution, ephemeralStorage int64, pods *int64) (apiv1.ResourceList, error) {
capacity := apiv1.ResourceList{}
if pods == nil {
capacity[apiv1.ResourcePods] = *resource.NewQuantity(110, resource.DecimalSI)
@ -68,6 +70,11 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(t.getAcceleratorCount(accelerators), resource.DecimalSI)
}
if ephemeralStorage > 0 {
storageTotal := ephemeralStorage - CalculateOSReservedEphemeralStorage(ephemeralStorage, osDistribution)
capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(math.Max(float64(storageTotal), 0)), resource.DecimalSI)
}
return capacity, nil
}
@ -79,7 +86,7 @@ func (t *GceTemplateBuilder) BuildCapacity(cpu int64, mem int64, accelerators []
// the kubelet for its operation. Allocated resources are capacity minus reserved.
// If we fail to extract the reserved resources from kubeEnv (e.g it is in a
// wrong format or does not contain kubelet arguments), we return an error.
func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.ResourceList, kubeEnv string) (apiv1.ResourceList, error) {
func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.ResourceList, kubeEnv string, evictionHard *EvictionHard) (apiv1.ResourceList, error) {
kubeReserved, err := extractKubeReservedFromKubeEnv(kubeEnv)
if err != nil {
return nil, err
@ -88,12 +95,12 @@ func (t *GceTemplateBuilder) BuildAllocatableFromKubeEnv(capacity apiv1.Resource
if err != nil {
return nil, err
}
return t.CalculateAllocatable(capacity, reserved), nil
return t.CalculateAllocatable(capacity, reserved, evictionHard), nil
}
// CalculateAllocatable computes allocatable resources subtracting kube reserved values
// and kubelet eviction memory buffer from corresponding capacity.
func (t *GceTemplateBuilder) CalculateAllocatable(capacity, kubeReserved apiv1.ResourceList) apiv1.ResourceList {
func (t *GceTemplateBuilder) CalculateAllocatable(capacity apiv1.ResourceList, kubeReserved apiv1.ResourceList, evictionHard *EvictionHard) apiv1.ResourceList {
allocatable := apiv1.ResourceList{}
for key, value := range capacity {
quantity := value.DeepCopy()
@ -101,7 +108,10 @@ func (t *GceTemplateBuilder) CalculateAllocatable(capacity, kubeReserved apiv1.R
quantity.Sub(reservedQuantity)
}
if key == apiv1.ResourceMemory {
quantity = *resource.NewQuantity(quantity.Value()-KubeletEvictionHardMemory, resource.BinarySI)
quantity = *resource.NewQuantity(quantity.Value()-GetKubeletEvictionHardForMemory(evictionHard), resource.BinarySI)
}
if key == apiv1.ResourceEphemeralStorage {
quantity = *resource.NewQuantity(quantity.Value()-int64(GetKubeletEvictionHardForEphemeralStorage(value.Value(), evictionHard)), resource.BinarySI)
}
allocatable[key] = quantity
}
@ -150,7 +160,18 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
return nil, fmt.Errorf("could not obtain os from kube-env from template metadata")
}
capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators, os, pods)
osDistribution := extractOperatingSystemDistributionFromKubeEnv(kubeEnvValue)
if osDistribution == OperatingSystemDistributionUnknown {
return nil, fmt.Errorf("could not obtain os-distribution from kube-env from template metadata")
}
ephemeralStorage, err := getEphemeralStorageFromInstanceTemplateProperties(template.Properties)
if err != nil {
klog.Errorf("could not fetch ephemeral storage from instance template. %s", err)
return nil, err
}
capacity, err := t.BuildCapacity(cpu, mem, template.Properties.GuestAccelerators, os, osDistribution, ephemeralStorage, pods)
if err != nil {
return nil, err
}
@ -174,7 +195,14 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
}
node.Spec.Taints = append(node.Spec.Taints, kubeEnvTaints...)
if allocatable, err := t.BuildAllocatableFromKubeEnv(node.Status.Capacity, kubeEnvValue); err == nil {
// Extract Eviction Hard
evictionHardFromKubeEnv, err := extractEvictionHardFromKubeEnv(kubeEnvValue)
if err != nil || len(evictionHardFromKubeEnv) == 0 {
klog.Warning("unable to get evictionHardFromKubeEnv values, continuing without it.")
}
evictionHard := ParseEvictionHardOrGetDefault(evictionHardFromKubeEnv)
if allocatable, err := t.BuildAllocatableFromKubeEnv(node.Status.Capacity, kubeEnvValue, evictionHard); err == nil {
nodeAllocatable = allocatable
}
}
@ -197,6 +225,22 @@ func (t *GceTemplateBuilder) BuildNodeFromTemplate(mig Mig, template *gce.Instan
return &node, nil
}
func getEphemeralStorageFromInstanceTemplateProperties(instanceProperties *gce.InstanceProperties) (ephemeralStorage int64, err error) {
if instanceProperties.Disks == nil {
return 0, fmt.Errorf("unable to get ephemeral storage because instance properties disks is nil")
}
for _, disk := range instanceProperties.Disks {
if disk != nil && disk.InitializeParams != nil {
if disk.Boot {
return disk.InitializeParams.DiskSizeGb * units.GiB, nil
}
}
}
return 0, fmt.Errorf("unable to get ephemeral storage, either no attached disks or no disk with boot=true")
}
// BuildGenericLabels builds basic labels that should be present on every GCE node,
// including hostname, zone etc.
func BuildGenericLabels(ref GceRef, machineType string, nodeName string, os OperatingSystem) (map[string]string, error) {
@ -346,6 +390,68 @@ func extractOperatingSystemFromKubeEnv(kubeEnv string) OperatingSystem {
}
}
// OperatingSystemDistribution denotes distribution of the operating system used by nodes coming from node group
type OperatingSystemDistribution string
const (
// OperatingSystemDistributionUnknown is used if operating distribution system is unknown
OperatingSystemDistributionUnknown OperatingSystemDistribution = ""
// OperatingSystemDistributionUbuntu is used if operating distribution system is Ubuntu
OperatingSystemDistributionUbuntu OperatingSystemDistribution = "ubuntu"
// OperatingSystemDistributionWindowsLTSC is used if operating distribution system is Windows LTSC
OperatingSystemDistributionWindowsLTSC OperatingSystemDistribution = "windows_ltsc"
// OperatingSystemDistributionWindowsSAC is used if operating distribution system is Windows SAC
OperatingSystemDistributionWindowsSAC OperatingSystemDistribution = "windows_sac"
// OperatingSystemDistributionCOS is used if operating distribution system is COS
OperatingSystemDistributionCOS OperatingSystemDistribution = "cos"
// OperatingSystemDistributionDefault defines which operating system will be assumed if not explicitly passed via AUTOSCALER_ENV_VARS
OperatingSystemDistributionDefault = OperatingSystemDistributionCOS
)
func extractOperatingSystemDistributionFromKubeEnv(kubeEnv string) OperatingSystemDistribution {
osDistributionValue, found, err := extractAutoscalerVarFromKubeEnv(kubeEnv, "os_distribution")
if err != nil {
klog.Errorf("error while obtaining os from AUTOSCALER_ENV_VARS; %v", err)
return OperatingSystemDistributionUnknown
}
if !found {
klog.Warningf("no os-distribution defined in AUTOSCALER_ENV_VARS; using default %v", OperatingSystemDistributionDefault)
return OperatingSystemDistributionDefault
}
switch osDistributionValue {
case string(OperatingSystemDistributionUbuntu):
return OperatingSystemDistributionUbuntu
case string(OperatingSystemDistributionWindowsLTSC):
return OperatingSystemDistributionWindowsLTSC
case string(OperatingSystemDistributionWindowsSAC):
return OperatingSystemDistributionWindowsSAC
case string(OperatingSystemDistributionCOS):
return OperatingSystemDistributionCOS
default:
klog.Errorf("unexpected os-distribution=%v passed via AUTOSCALER_ENV_VARS", osDistributionValue)
return OperatingSystemDistributionUnknown
}
}
func extractEvictionHardFromKubeEnv(kubeEnvValue string) (map[string]string, error) {
evictionHardAsString, found, err := extractAutoscalerVarFromKubeEnv(kubeEnvValue, "evictionHard")
if err != nil {
klog.Warning("error while obtaining eviction-hard from AUTOSCALER_ENV_VARS; %v", err)
return nil, err
}
if !found {
klog.Warning("no evictionHard defined in AUTOSCALER_ENV_VARS;")
return make(map[string]string), nil
}
return parseKeyValueListToMap(evictionHardAsString)
}
func extractAutoscalerVarFromKubeEnv(kubeEnv, name string) (value string, found bool, err error) {
const autoscalerVars = "AUTOSCALER_ENV_VARS"
autoscalerVals, err := extractFromKubeEnv(kubeEnv, autoscalerVars)

View File

@ -115,6 +115,14 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
GuestAccelerators: tc.accelerators,
Metadata: &gce.Metadata{},
MachineType: "irrelevant-type",
Disks: []*gce.AttachedDisk{
{
Boot: true,
InitializeParams: &gce.AttachedDiskInitializeParams{
DiskSizeGb: 0,
},
},
},
},
}
if tc.kubeEnv != "" {
@ -129,15 +137,15 @@ func TestBuildNodeFromTemplateSetsResources(t *testing.T) {
assert.NotNil(t, node.Status)
assert.NotNil(t, node.Status.Capacity)
assert.NotNil(t, node.Status.Allocatable)
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators, OperatingSystemLinux, tc.pods)
capacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, tc.accelerators, OperatingSystemLinux, OperatingSystemDistributionCOS, -1, tc.pods)
assert.NoError(t, err)
assertEqualResourceLists(t, "Capacity", capacity, node.Status.Capacity)
if !tc.kubeReserved {
assertEqualResourceLists(t, "Allocatable", capacity, node.Status.Allocatable)
} else {
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0, "")
assert.NoError(t, err)
allocatable := tb.CalculateAllocatable(capacity, reserved)
allocatable := tb.CalculateAllocatable(capacity, reserved, ParseEvictionHardOrGetDefault(nil))
assertEqualResourceLists(t, "Allocatable", allocatable, node.Status.Allocatable)
}
}
@ -208,44 +216,53 @@ func TestBuildGenericLabels(t *testing.T) {
func TestCalculateAllocatable(t *testing.T) {
type testCase struct {
scenario string
capacityCpu string
reservedCpu string
allocatableCpu string
capacityMemory string
reservedMemory string
allocatableMemory string
scenario string
capacityCpu string
reservedCpu string
allocatableCpu string
capacityMemory string
reservedMemory string
allocatableMemory string
capacityEphemeralStorage string
reservedEphemeralStorage string
allocatableEphemeralStorage string
}
testCases := []testCase{
{
scenario: "no reservations",
capacityCpu: "8",
reservedCpu: "0",
allocatableCpu: "8",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: "0",
allocatableMemory: fmt.Sprintf("%v", 200*units.MiB-KubeletEvictionHardMemory),
scenario: "no reservations",
capacityCpu: "8",
reservedCpu: "0",
allocatableCpu: "8",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: "0",
allocatableMemory: fmt.Sprintf("%v", 200*units.MiB-GetKubeletEvictionHardForMemory(nil)),
capacityEphemeralStorage: fmt.Sprintf("%v", 200*units.GiB),
reservedEphemeralStorage: "0",
allocatableEphemeralStorage: fmt.Sprintf("%v", 200*units.GiB-GetKubeletEvictionHardForEphemeralStorage(200*GiB, nil)),
},
{
scenario: "reserved cpu and memory",
capacityCpu: "8",
reservedCpu: "1000m",
allocatableCpu: "7000m",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: fmt.Sprintf("%v", 50*units.MiB),
allocatableMemory: fmt.Sprintf("%v", 150*units.MiB-KubeletEvictionHardMemory),
scenario: "reserved cpu, memory and ephemeral storage",
capacityCpu: "8",
reservedCpu: "1000m",
allocatableCpu: "7000m",
capacityMemory: fmt.Sprintf("%v", 200*units.MiB),
reservedMemory: fmt.Sprintf("%v", 50*units.MiB),
allocatableMemory: fmt.Sprintf("%v", 150*units.MiB-GetKubeletEvictionHardForMemory(nil)),
capacityEphemeralStorage: fmt.Sprintf("%v", 200*units.GiB),
reservedEphemeralStorage: fmt.Sprintf("%v", 40*units.GiB),
allocatableEphemeralStorage: fmt.Sprintf("%v", 160*units.GiB-GetKubeletEvictionHardForEphemeralStorage(200*GiB, nil)),
},
}
for _, tc := range testCases {
t.Run(tc.scenario, func(t *testing.T) {
tb := GceTemplateBuilder{}
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, 0)
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, 0, tc.capacityEphemeralStorage)
assert.NoError(t, err)
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0)
reserved, err := makeResourceList(tc.reservedCpu, tc.reservedMemory, 0, tc.reservedEphemeralStorage)
assert.NoError(t, err)
expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, 0)
expectedAllocatable, err := makeResourceList(tc.allocatableCpu, tc.allocatableMemory, 0, tc.allocatableEphemeralStorage)
assert.NoError(t, err)
allocatable := tb.CalculateAllocatable(capacity, reserved)
allocatable := tb.CalculateAllocatable(capacity, reserved, ParseEvictionHardOrGetDefault(nil))
assertEqualResourceLists(t, "Allocatable", expectedAllocatable, allocatable)
})
}
@ -253,26 +270,30 @@ func TestCalculateAllocatable(t *testing.T) {
func TestBuildAllocatableFromKubeEnv(t *testing.T) {
type testCase struct {
kubeEnv string
capacityCpu string
capacityMemory string
expectedCpu string
expectedMemory string
gpuCount int64
expectedErr bool
kubeEnv string
capacityCpu string
capacityMemory string
capacityEphemeralStorage string
expectedCpu string
expectedMemory string
expectedEphemeralStorage string
gpuCount int64
expectedErr bool
}
testCases := []testCase{{
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=300000Mi\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=300000Mi,ephemeral-storage=30Gi\n" +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
capacityCpu: "4000m",
capacityMemory: "700000Mi",
expectedCpu: "3000m",
expectedMemory: "399900Mi", // capacityMemory-kube_reserved-kubeletEvictionHardMemory
gpuCount: 10,
expectedErr: false,
capacityCpu: "4000m",
capacityMemory: "700000Mi",
capacityEphemeralStorage: "100Gi",
expectedCpu: "3000m",
expectedMemory: "399900Mi", // capacityMemory-kube_reserved-DefaultKubeletEvictionHardMemory
expectedEphemeralStorage: "60Gi", // capacityEphemeralStorage-kube_reserved-DefaultKubeletEvictionHardMemory
gpuCount: 10,
expectedErr: false,
}, {
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
@ -283,15 +304,15 @@ func TestBuildAllocatableFromKubeEnv(t *testing.T) {
expectedErr: true,
}}
for _, tc := range testCases {
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, tc.gpuCount)
capacity, err := makeResourceList(tc.capacityCpu, tc.capacityMemory, tc.gpuCount, tc.capacityEphemeralStorage)
assert.NoError(t, err)
tb := GceTemplateBuilder{}
allocatable, err := tb.BuildAllocatableFromKubeEnv(capacity, tc.kubeEnv)
allocatable, err := tb.BuildAllocatableFromKubeEnv(capacity, tc.kubeEnv, ParseEvictionHardOrGetDefault(nil))
if tc.expectedErr {
assert.Error(t, err)
} else {
assert.NoError(t, err)
expectedResources, err := makeResourceList(tc.expectedCpu, tc.expectedMemory, tc.gpuCount)
expectedResources, err := makeResourceList(tc.expectedCpu, tc.expectedMemory, tc.gpuCount, tc.expectedEphemeralStorage)
assert.NoError(t, err)
for res, expectedQty := range expectedResources {
qty, found := allocatable[res]
@ -302,6 +323,50 @@ func TestBuildAllocatableFromKubeEnv(t *testing.T) {
}
}
func TestParseEvictionHard(t *testing.T) {
type testCase struct {
memory string
ephemeralStorage string
memoryExpected int64 // bytes
ephemeralStorageRatioExpected float64
}
testCases := []testCase{{
memory: "200Mi",
ephemeralStorage: "15%",
memoryExpected: 200 * 1024 * 1024,
ephemeralStorageRatioExpected: 0.15,
}, {
memory: "2Gi",
ephemeralStorage: "11.5%",
memoryExpected: 2 * 1024 * 1024 * 1024,
ephemeralStorageRatioExpected: 0.115,
}, {
memory: "",
ephemeralStorage: "", // empty string, fallback to default
memoryExpected: 100 * 1024 * 1024,
ephemeralStorageRatioExpected: 0.1,
}, {
memory: "110292",
ephemeralStorage: "11", // percentage missing, should fallback to default
memoryExpected: 110292,
ephemeralStorageRatioExpected: 0.1,
}, {
memory: "abcb12", // unparsable, fallback to default
ephemeralStorage: "-11%", // negative percentage, should fallback to default
memoryExpected: 100 * 1024 * 1024,
ephemeralStorageRatioExpected: 0.1,
}}
for _, tc := range testCases {
test := map[string]string{
MemoryEvictionHardTag: tc.memory,
EphemeralStorageEvictionHardTag: tc.ephemeralStorage,
}
actualOutput := ParseEvictionHardOrGetDefault(test)
assert.EqualValues(t, tc.memoryExpected, actualOutput.MemoryEvictionQuantity, "TestParseEviction Failed Memory. %v expected does not match %v actual.", tc.memoryExpected, actualOutput.MemoryEvictionQuantity)
assert.EqualValues(t, tc.ephemeralStorageRatioExpected, actualOutput.EphemeralStorageEvictionRatio, "TestParseEviction Failed Ephemeral Storage. %v expected does not match %v actual.", tc.memoryExpected, actualOutput.EphemeralStorageEvictionRatio)
}
}
func TestGetAcceleratorCount(t *testing.T) {
testCases := []struct {
accelerators []*gce.AcceleratorConfig
@ -371,7 +436,7 @@ func TestBuildCapacityMemory(t *testing.T) {
t.Run(fmt.Sprintf("%v", idx), func(t *testing.T) {
tb := GceTemplateBuilder{}
noAccelerators := make([]*gce.AcceleratorConfig, 0)
buildCapacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, noAccelerators, tc.os, nil)
buildCapacity, err := tb.BuildCapacity(tc.physicalCpu, tc.physicalMemory, noAccelerators, tc.os, OperatingSystemDistributionCOS, -1, nil)
assert.NoError(t, err)
expectedCapacity, err := makeResourceList2(tc.physicalCpu, tc.expectedCapacityMemory, 0, 110)
assert.NoError(t, err)
@ -715,23 +780,129 @@ func TestExtractOperatingSystemFromKubeEnv(t *testing.T) {
}
}
func TestExtractOperatingSystemDistributionFromKubeEnv(t *testing.T) {
type testCase struct {
name string
kubeEnv string
expectedOperatingSystemDistribution OperatingSystemDistribution
}
testCases := []testCase{
{
name: "cos",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true;" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=cos\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionCOS,
},
{
name: "ubuntu",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true;" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=ubuntu\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionUbuntu,
},
{
name: "windows ltsc",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=windows_ltsc\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionWindowsLTSC,
},
{
name: "windows sac",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=windows_sac\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionWindowsSAC,
},
{
name: "no AUTOSCALER_ENV_VARS",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"NODE_LABELS: a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction --kube-reserved=cpu=1000m,memory=300000Mi\n" +
"NODE_TAINTS: 'dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c'\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionDefault,
},
{
name: "no os distribution defined",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true;" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionDefault,
},
{
name: "os distribution is empty",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true;" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionUnknown,
},
{
name: "unknown (macos)",
kubeEnv: "ENABLE_NODE_PROBLEM_DETECTOR: 'daemonset'\n" +
"DNS_SERVER_IP: '10.0.0.10'\n" +
"AUTOSCALER_ENV_VARS: node_labels=a=b,c=d,cloud.google.com/gke-nodepool=pool-3,cloud.google.com/gke-preemptible=true" +
"node_taints='dedicated=ml:NoSchedule,test=dev:PreferNoSchedule,a=b:c';" +
"kube_reserved=cpu=1000m,memory=300000Mi;" +
"os_distribution=macos\n" +
"KUBELET_TEST_ARGS: --experimental-allocatable-ignore-eviction\n",
expectedOperatingSystemDistribution: OperatingSystemDistributionUnknown,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
actualOperatingSystem := extractOperatingSystemDistributionFromKubeEnv(tc.kubeEnv)
assert.Equal(t, tc.expectedOperatingSystemDistribution, actualOperatingSystem)
})
}
}
func TestParseKubeReserved(t *testing.T) {
type testCase struct {
reserved string
expectedCpu string
expectedMemory string
expectedErr bool
reserved string
expectedCpu string
expectedMemory string
expectedEphemeralStorage string
expectedErr bool
}
testCases := []testCase{{
reserved: "cpu=1000m,memory=300000Mi",
expectedCpu: "1000m",
expectedMemory: "300000Mi",
expectedErr: false,
reserved: "cpu=1000m,memory=300000Mi,ephemeral-storage=100Gi",
expectedCpu: "1000m",
expectedMemory: "300000Mi",
expectedEphemeralStorage: "100Gi",
expectedErr: false,
}, {
reserved: "cpu=1000m,ignored=300Mi,memory=0",
expectedCpu: "1000m",
expectedMemory: "0",
expectedErr: false,
reserved: "cpu=1000m,ignored=300Mi,memory=0,ephemeral-storage=10Gi",
expectedCpu: "1000m",
expectedMemory: "0",
expectedEphemeralStorage: "10Gi",
expectedErr: false,
}, {
reserved: "This is a wrong reserved",
expectedErr: true,
@ -743,7 +914,7 @@ func TestParseKubeReserved(t *testing.T) {
assert.Nil(t, resources)
} else {
assert.NoError(t, err)
expectedResources, err := makeResourceList(tc.expectedCpu, tc.expectedMemory, 0)
expectedResources, err := makeResourceList(tc.expectedCpu, tc.expectedMemory, 0, tc.expectedEphemeralStorage)
assert.NoError(t, err)
assertEqualResourceLists(t, "Resources", expectedResources, resources)
}
@ -758,7 +929,7 @@ func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
return set
}
func makeResourceList(cpu string, memory string, gpu int64) (apiv1.ResourceList, error) {
func makeResourceList(cpu string, memory string, gpu int64, ephemeralStorage string) (apiv1.ResourceList, error) {
result := apiv1.ResourceList{}
resultCpu, err := resource.ParseQuantity(cpu)
if err != nil {
@ -777,6 +948,13 @@ func makeResourceList(cpu string, memory string, gpu int64) (apiv1.ResourceList,
}
result[gpuUtils.ResourceNvidiaGPU] = resultGpu
}
if len(ephemeralStorage) != 0 {
resultEphemeralStorage, err := resource.ParseQuantity(ephemeralStorage)
if err != nil {
return nil, err
}
result[apiv1.ResourceEphemeralStorage] = resultEphemeralStorage
}
return result, nil
}

View File

@ -18,6 +18,7 @@ package simulator
import (
"fmt"
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)