autoscaler/cluster-autoscaler/cloudprovider/volcengine/volcengine_cloud_provider.go

237 lines
8.2 KiB
Go

/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package volcengine
import (
"fmt"
"io"
"os"
"strings"
"gopkg.in/gcfg.v1"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
"k8s.io/autoscaler/cluster-autoscaler/utils/errors"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/klog/v2"
)
// volcengineCloudProvider implements CloudProvider interface.
type volcengineCloudProvider struct {
volcengineManager VolcengineManager
resourceLimiter *cloudprovider.ResourceLimiter
scalingGroups []*AutoScalingGroup
}
// Name returns name of the cloud provider.
func (v *volcengineCloudProvider) Name() string {
return cloudprovider.VolcengineProviderName
}
// NodeGroups returns all node groups configured for this cloud provider.
func (v *volcengineCloudProvider) NodeGroups() []cloudprovider.NodeGroup {
result := make([]cloudprovider.NodeGroup, 0, len(v.scalingGroups))
for _, ng := range v.scalingGroups {
result = append(result, ng)
}
return result
}
// NodeGroupForNode returns the node group for the given node, nil if the node
// should not be processed by cluster autoscaler, or non-nil error if such
// occurred. Must be implemented.
func (v *volcengineCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
instanceId, err := ecsInstanceFromProviderId(node.Spec.ProviderID)
if err != nil {
return nil, err
}
if len(instanceId) == 0 {
klog.Warningf("Node %v has no providerId", node.Name)
return nil, fmt.Errorf("provider id missing from node: %s", node.Name)
}
return v.volcengineManager.GetAsgForInstance(instanceId)
}
// HasInstance returns whether the node has corresponding instance in cloud provider,
// true if the node has an instance, false if it no longer exists
func (v *volcengineCloudProvider) HasInstance(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// Pricing returns pricing model for this cloud provider or error if not available.
// Implementation optional.
func (v *volcengineCloudProvider) Pricing() (cloudprovider.PricingModel, errors.AutoscalerError) {
return nil, cloudprovider.ErrNotImplemented
}
// GetAvailableMachineTypes get all machine types that can be requested from the cloud provider.
// Implementation optional.
func (v *volcengineCloudProvider) GetAvailableMachineTypes() ([]string, error) {
return []string{}, nil
}
// NewNodeGroup builds a theoretical node group based on the node definition provided. The node group is not automatically
// created on the cloud provider side. The node group is not returned by NodeGroups() until it is created.
// Implementation optional.
func (v *volcengineCloudProvider) NewNodeGroup(machineType string, labels map[string]string, systemLabels map[string]string, taints []apiv1.Taint, extraResources map[string]resource.Quantity) (cloudprovider.NodeGroup, error) {
return nil, cloudprovider.ErrNotImplemented
}
// GetResourceLimiter returns struct containing limits (max, min) for resources (cores, memory etc.).
func (v *volcengineCloudProvider) GetResourceLimiter() (*cloudprovider.ResourceLimiter, error) {
return v.resourceLimiter, nil
}
// GPULabel returns the label added to nodes with GPU resource.
func (v *volcengineCloudProvider) GPULabel() string {
return ""
}
// GetAvailableGPUTypes return all available GPU types cloud provider supports.
func (v *volcengineCloudProvider) GetAvailableGPUTypes() map[string]struct{} {
return map[string]struct{}{}
}
// Cleanup cleans up open resources before the cloud provider is destroyed, i.e. go routines etc.
func (v *volcengineCloudProvider) Cleanup() error {
return nil
}
// Refresh is called before every main loop and can be used to dynamically update cloud provider state.
// In particular the list of node groups returned by NodeGroups can change as a result of CloudProvider.Refresh().
func (v *volcengineCloudProvider) Refresh() error {
return nil
}
// IsNodeCandidateForScaleDown returns whether the node is a good candidate for scaling down.
func (v *volcengineCloudProvider) IsNodeCandidateForScaleDown(node *apiv1.Node) (bool, error) {
return true, cloudprovider.ErrNotImplemented
}
// GetNodeGpuConfig returns the label, type and resource name for the GPU added to node. If node doesn't have
// any GPUs, it returns nil.
func (v *volcengineCloudProvider) GetNodeGpuConfig(node *apiv1.Node) *cloudprovider.GpuConfig {
return gpu.GetNodeGPUFromCloudProvider(v, node)
}
func (v *volcengineCloudProvider) addNodeGroup(spec string) error {
group, err := buildScalingGroupFromSpec(v.volcengineManager, spec)
if err != nil {
klog.Errorf("Failed to build scaling group from spec: %v", err)
return err
}
v.addAsg(group)
return nil
}
func (v *volcengineCloudProvider) addAsg(asg *AutoScalingGroup) {
v.scalingGroups = append(v.scalingGroups, asg)
v.volcengineManager.RegisterAsg(asg)
}
func buildScalingGroupFromSpec(manager VolcengineManager, spec string) (*AutoScalingGroup, error) {
nodeGroupSpec, err := dynamic.SpecFromString(spec, true)
if err != nil {
return nil, fmt.Errorf("failed to parse node group spec: %v", err)
}
group, err := manager.GetAsgById(nodeGroupSpec.Name)
if err != nil {
klog.Errorf("scaling group %s not exists", nodeGroupSpec.Name)
return nil, err
}
return &AutoScalingGroup{
manager: manager,
asgId: nodeGroupSpec.Name,
minInstanceNumber: group.minInstanceNumber,
maxInstanceNumber: group.maxInstanceNumber,
}, nil
}
// BuildVolcengine builds CloudProvider implementation for Volcengine
func BuildVolcengine(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) cloudprovider.CloudProvider {
if opts.CloudConfig == "" {
klog.Fatalf("The path to the cloud provider configuration file must be set via the --cloud-config command line parameter")
}
cloudConf, err := readConf(opts.CloudConfig)
if err != nil {
klog.Warningf("Failed to read cloud provider configuration: %v", err)
cloudConf = &cloudConfig{}
}
if !cloudConf.validate() {
klog.Fatalf("Failed to validate cloud provider configuration: %v", err)
}
manager, err := CreateVolcengineManager(cloudConf)
if err != nil {
klog.Fatalf("Failed to create volcengine manager: %v", err)
}
provider, err := buildVolcengineProvider(manager, do, rl)
if err != nil {
klog.Fatalf("Failed to create volcengine cloud provider: %v", err)
}
return provider
}
func buildVolcengineProvider(manager VolcengineManager, do cloudprovider.NodeGroupDiscoveryOptions, rl *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
if !do.StaticDiscoverySpecified() {
return nil, fmt.Errorf("static discovery configuration must be provided for volcengine cloud provider")
}
provider := &volcengineCloudProvider{
volcengineManager: manager,
resourceLimiter: rl,
}
for _, spec := range do.NodeGroupSpecs {
if err := provider.addNodeGroup(spec); err != nil {
klog.Warningf("Failed to add node group from spec %s: %v", spec, err)
return nil, err
}
}
return provider, nil
}
func readConf(confFile string) (*cloudConfig, error) {
var conf io.ReadCloser
conf, err := os.Open(confFile)
if err != nil {
return nil, err
}
defer conf.Close()
var cloudConfig cloudConfig
if err = gcfg.ReadInto(&cloudConfig, conf); err != nil {
return nil, err
}
return &cloudConfig, nil
}
func ecsInstanceFromProviderId(providerId string) (string, error) {
if !strings.HasPrefix(providerId, "volcengine://") {
return "", fmt.Errorf("providerId %q doesn't match prefix %q", providerId, "volcengine://")
}
return strings.TrimPrefix(providerId, "volcengine://"), nil
}