Merge pull request #4274 from kinvolk/imran/cloud-provider-packet-fix

Cloud provider[Packet] fixes
This commit is contained in:
Kubernetes Prow Robot 2021-08-19 11:35:25 -07:00 committed by GitHub
commit fb8fdf819b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 82 additions and 6 deletions

View File

@ -79,6 +79,35 @@ affinity:
- t1.small.x86
```
## CCM and Controller node labels
### CCM
By default, autoscaler assumes that you have an older deprecated version of `packet-ccm` installed in your
cluster. If however, that is not the case and you've migrated to the new `cloud-provider-equinix-metal` CCM,
then this must be told to autoscaler. This can be done via setting an environment variable in the deployment:
```
env:
- name: INSTALLED_CCM
value: cloud-provider-equinix-metal
```
**NOTE**: As a prerequisite, ensure that all worker nodes in your cluster have the prefix `equinixmetal://` in
the Node spec `.spec.providerID`. If there are any existing worker nodes with prefix `packet://`, then drain
the node, remove the node and restart the kubelet on that worker node to re-register the node in the cluster,
this would ensure that `cloud-provider-equinix-metal` CCM sets the uuid with prefix `equinixmetal://` to the
field `.spec.ProviderID`.
### Controller node labels
Autoscaler assumes that control plane nodes in your cluster are identified by the label
`node-role.kubernetes.io/master`. If for some reason, this assumption is not true in your case, then set the
envirnment variable in the deployment:
```
env:
- name: PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL
value: <label>
```
## Notes
The autoscaler will not remove nodes which have non-default kube-system pods.

View File

@ -37,6 +37,11 @@ const (
ProviderName = "packet"
// GPULabel is the label added to nodes with GPU resource.
GPULabel = "cloud.google.com/gke-accelerator"
// DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as
// master/controller node.
DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master"
// ControllerNodeIdentifierEnv is the string for the environment variable.
ControllerNodeIdentifierEnv = "PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL"
)
var (
@ -94,7 +99,13 @@ func (pcp *packetCloudProvider) AddNodeGroup(group packetNodeGroup) {
//
// Since only a single node group is currently supported, the first node group is always returned.
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
if _, found := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; found {
controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv)
if controllerNodeLabel == "" {
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
controllerNodeLabel = DefaultControllerNodeLabelKey
}
if _, found := node.ObjectMeta.Labels[controllerNodeLabel]; found {
return nil, nil
}
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)

View File

@ -48,6 +48,8 @@ import (
const (
userAgent = "kubernetes/cluster-autoscaler/" + version.ClusterAutoscalerVersion
expectedAPIContentTypePrefix = "application/json"
packetPrefix = "packet://"
equinixMetalPrefix = "equinixmetal://"
)
type instanceType struct {
@ -292,7 +294,12 @@ func Contains(a []string, x string) bool {
// createPacketManagerRest sets up the client and returns
// an packetManagerRest.
func createPacketManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*packetManagerRest, error) {
var cfg ConfigFile
// Initialize ConfigFile instance
cfg := ConfigFile{
DefaultNodegroupdef: ConfigNodepool{},
Nodegroupdef: map[string]*ConfigNodepool{},
}
if configReader != nil {
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
klog.Errorf("Couldn't read config: %v", err)
@ -431,7 +438,11 @@ func (mgr *packetManagerRest) NodeGroupForNode(labels map[string]string, nodeId
if nodegroup, ok := labels["pool"]; ok {
return nodegroup, nil
}
device, err := mgr.getPacketDevice(context.TODO(), strings.TrimPrefix(nodeId, "packet://"))
trimmedNodeId := strings.TrimPrefix(nodeId, packetPrefix)
trimmedNodeId = strings.TrimPrefix(trimmedNodeId, equinixMetalPrefix)
device, err := mgr.getPacketDevice(context.TODO(), trimmedNodeId)
if err != nil {
return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err)
}
@ -590,9 +601,30 @@ func (mgr *packetManagerRest) getNodes(nodegroup string) ([]string, error) {
nodes := []string{}
// This bit of code along with the switch statement, checks if the CCM installed on the cluster is
// `packet-ccm` or `cloud-provider-equinix-metal`. The reason its important to check because depending
// on the CCM installed, the prefix in providerID of K8s Node spec differs from either `packet://` or
// `equinixmetal://`. This is now needed as `packet-ccm` is now deprecated and renamed in favor of
// `cloud-provider-equinix-metal`.
// This code checks if the INSTALLED_CCM env var is set or not. If set to `cloud-provider-equinix-metal`,
// the prefix is set to `equinixmetal://` and any other case the prefix is `packet://`.
// At a later point in time, there would be a need to make `equinixmetal://` prefix as the default or do away
// with `packet://` prefix entirely. This should happen presumably when the packet code in autoscaler is
// renamed from packet to equinixmetal.
prefix := packetPrefix
switch installedCCM := os.Getenv("INSTALLED_CCM"); installedCCM {
case "packet-ccm":
prefix = packetPrefix
case "cloud-provider-equinix-metal":
prefix = equinixMetalPrefix
default:
klog.V(3).Info("Unrecognized value: expected INSTALLED_CCM to be either `packet-ccm` or `cloud-provider-equinix-metal`, using default: `packet-ccm`")
}
for _, d := range devices.Devices {
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
nodes = append(nodes, fmt.Sprintf("packet://%s", d.ID))
nodes = append(nodes, fmt.Sprintf("%s%s", prefix, d.ID))
}
}
@ -660,11 +692,15 @@ func (mgr *packetManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, upd
klog.Infof("Checking device %v", d)
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
klog.Infof("nodegroup match %s %s", d.Hostname, n.Name)
trimmedName := strings.TrimPrefix(n.Name, packetPrefix)
trimmedName = strings.TrimPrefix(trimmedName, equinixMetalPrefix)
switch {
case d.Hostname == n.Name:
klog.V(1).Infof("Matching Packet Device %s - %s", d.Hostname, d.ID)
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
case fakeNode && strings.TrimPrefix(n.Name, "packet://") == d.ID:
case fakeNode && trimmedName == d.ID:
klog.V(1).Infof("Fake Node %s", d.ID)
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
}