Merge pull request #4274 from kinvolk/imran/cloud-provider-packet-fix
Cloud provider[Packet] fixes
This commit is contained in:
commit
fb8fdf819b
|
|
@ -79,6 +79,35 @@ affinity:
|
|||
- t1.small.x86
|
||||
```
|
||||
|
||||
## CCM and Controller node labels
|
||||
|
||||
### CCM
|
||||
By default, autoscaler assumes that you have an older deprecated version of `packet-ccm` installed in your
|
||||
cluster. If however, that is not the case and you've migrated to the new `cloud-provider-equinix-metal` CCM,
|
||||
then this must be told to autoscaler. This can be done via setting an environment variable in the deployment:
|
||||
```
|
||||
env:
|
||||
- name: INSTALLED_CCM
|
||||
value: cloud-provider-equinix-metal
|
||||
```
|
||||
**NOTE**: As a prerequisite, ensure that all worker nodes in your cluster have the prefix `equinixmetal://` in
|
||||
the Node spec `.spec.providerID`. If there are any existing worker nodes with prefix `packet://`, then drain
|
||||
the node, remove the node and restart the kubelet on that worker node to re-register the node in the cluster,
|
||||
this would ensure that `cloud-provider-equinix-metal` CCM sets the uuid with prefix `equinixmetal://` to the
|
||||
field `.spec.ProviderID`.
|
||||
|
||||
### Controller node labels
|
||||
|
||||
Autoscaler assumes that control plane nodes in your cluster are identified by the label
|
||||
`node-role.kubernetes.io/master`. If for some reason, this assumption is not true in your case, then set the
|
||||
envirnment variable in the deployment:
|
||||
|
||||
```
|
||||
env:
|
||||
- name: PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL
|
||||
value: <label>
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
The autoscaler will not remove nodes which have non-default kube-system pods.
|
||||
|
|
|
|||
|
|
@ -37,6 +37,11 @@ const (
|
|||
ProviderName = "packet"
|
||||
// GPULabel is the label added to nodes with GPU resource.
|
||||
GPULabel = "cloud.google.com/gke-accelerator"
|
||||
// DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as
|
||||
// master/controller node.
|
||||
DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master"
|
||||
// ControllerNodeIdentifierEnv is the string for the environment variable.
|
||||
ControllerNodeIdentifierEnv = "PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
@ -94,7 +99,13 @@ func (pcp *packetCloudProvider) AddNodeGroup(group packetNodeGroup) {
|
|||
//
|
||||
// Since only a single node group is currently supported, the first node group is always returned.
|
||||
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
|
||||
if _, found := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; found {
|
||||
controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv)
|
||||
if controllerNodeLabel == "" {
|
||||
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
|
||||
controllerNodeLabel = DefaultControllerNodeLabelKey
|
||||
}
|
||||
|
||||
if _, found := node.ObjectMeta.Labels[controllerNodeLabel]; found {
|
||||
return nil, nil
|
||||
}
|
||||
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ import (
|
|||
const (
|
||||
userAgent = "kubernetes/cluster-autoscaler/" + version.ClusterAutoscalerVersion
|
||||
expectedAPIContentTypePrefix = "application/json"
|
||||
packetPrefix = "packet://"
|
||||
equinixMetalPrefix = "equinixmetal://"
|
||||
)
|
||||
|
||||
type instanceType struct {
|
||||
|
|
@ -292,7 +294,12 @@ func Contains(a []string, x string) bool {
|
|||
// createPacketManagerRest sets up the client and returns
|
||||
// an packetManagerRest.
|
||||
func createPacketManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*packetManagerRest, error) {
|
||||
var cfg ConfigFile
|
||||
// Initialize ConfigFile instance
|
||||
cfg := ConfigFile{
|
||||
DefaultNodegroupdef: ConfigNodepool{},
|
||||
Nodegroupdef: map[string]*ConfigNodepool{},
|
||||
}
|
||||
|
||||
if configReader != nil {
|
||||
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
|
||||
klog.Errorf("Couldn't read config: %v", err)
|
||||
|
|
@ -431,7 +438,11 @@ func (mgr *packetManagerRest) NodeGroupForNode(labels map[string]string, nodeId
|
|||
if nodegroup, ok := labels["pool"]; ok {
|
||||
return nodegroup, nil
|
||||
}
|
||||
device, err := mgr.getPacketDevice(context.TODO(), strings.TrimPrefix(nodeId, "packet://"))
|
||||
|
||||
trimmedNodeId := strings.TrimPrefix(nodeId, packetPrefix)
|
||||
trimmedNodeId = strings.TrimPrefix(trimmedNodeId, equinixMetalPrefix)
|
||||
|
||||
device, err := mgr.getPacketDevice(context.TODO(), trimmedNodeId)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err)
|
||||
}
|
||||
|
|
@ -590,9 +601,30 @@ func (mgr *packetManagerRest) getNodes(nodegroup string) ([]string, error) {
|
|||
|
||||
nodes := []string{}
|
||||
|
||||
// This bit of code along with the switch statement, checks if the CCM installed on the cluster is
|
||||
// `packet-ccm` or `cloud-provider-equinix-metal`. The reason its important to check because depending
|
||||
// on the CCM installed, the prefix in providerID of K8s Node spec differs from either `packet://` or
|
||||
// `equinixmetal://`. This is now needed as `packet-ccm` is now deprecated and renamed in favor of
|
||||
// `cloud-provider-equinix-metal`.
|
||||
// This code checks if the INSTALLED_CCM env var is set or not. If set to `cloud-provider-equinix-metal`,
|
||||
// the prefix is set to `equinixmetal://` and any other case the prefix is `packet://`.
|
||||
// At a later point in time, there would be a need to make `equinixmetal://` prefix as the default or do away
|
||||
// with `packet://` prefix entirely. This should happen presumably when the packet code in autoscaler is
|
||||
// renamed from packet to equinixmetal.
|
||||
prefix := packetPrefix
|
||||
|
||||
switch installedCCM := os.Getenv("INSTALLED_CCM"); installedCCM {
|
||||
case "packet-ccm":
|
||||
prefix = packetPrefix
|
||||
case "cloud-provider-equinix-metal":
|
||||
prefix = equinixMetalPrefix
|
||||
default:
|
||||
klog.V(3).Info("Unrecognized value: expected INSTALLED_CCM to be either `packet-ccm` or `cloud-provider-equinix-metal`, using default: `packet-ccm`")
|
||||
}
|
||||
|
||||
for _, d := range devices.Devices {
|
||||
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
||||
nodes = append(nodes, fmt.Sprintf("packet://%s", d.ID))
|
||||
nodes = append(nodes, fmt.Sprintf("%s%s", prefix, d.ID))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -660,11 +692,15 @@ func (mgr *packetManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, upd
|
|||
klog.Infof("Checking device %v", d)
|
||||
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
||||
klog.Infof("nodegroup match %s %s", d.Hostname, n.Name)
|
||||
|
||||
trimmedName := strings.TrimPrefix(n.Name, packetPrefix)
|
||||
trimmedName = strings.TrimPrefix(trimmedName, equinixMetalPrefix)
|
||||
|
||||
switch {
|
||||
case d.Hostname == n.Name:
|
||||
klog.V(1).Infof("Matching Packet Device %s - %s", d.Hostname, d.ID)
|
||||
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
||||
case fakeNode && strings.TrimPrefix(n.Name, "packet://") == d.ID:
|
||||
case fakeNode && trimmedName == d.ID:
|
||||
klog.V(1).Infof("Fake Node %s", d.ID)
|
||||
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue