Merge pull request #4274 from kinvolk/imran/cloud-provider-packet-fix
Cloud provider[Packet] fixes
This commit is contained in:
commit
fb8fdf819b
|
|
@ -79,6 +79,35 @@ affinity:
|
||||||
- t1.small.x86
|
- t1.small.x86
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## CCM and Controller node labels
|
||||||
|
|
||||||
|
### CCM
|
||||||
|
By default, autoscaler assumes that you have an older deprecated version of `packet-ccm` installed in your
|
||||||
|
cluster. If however, that is not the case and you've migrated to the new `cloud-provider-equinix-metal` CCM,
|
||||||
|
then this must be told to autoscaler. This can be done via setting an environment variable in the deployment:
|
||||||
|
```
|
||||||
|
env:
|
||||||
|
- name: INSTALLED_CCM
|
||||||
|
value: cloud-provider-equinix-metal
|
||||||
|
```
|
||||||
|
**NOTE**: As a prerequisite, ensure that all worker nodes in your cluster have the prefix `equinixmetal://` in
|
||||||
|
the Node spec `.spec.providerID`. If there are any existing worker nodes with prefix `packet://`, then drain
|
||||||
|
the node, remove the node and restart the kubelet on that worker node to re-register the node in the cluster,
|
||||||
|
this would ensure that `cloud-provider-equinix-metal` CCM sets the uuid with prefix `equinixmetal://` to the
|
||||||
|
field `.spec.ProviderID`.
|
||||||
|
|
||||||
|
### Controller node labels
|
||||||
|
|
||||||
|
Autoscaler assumes that control plane nodes in your cluster are identified by the label
|
||||||
|
`node-role.kubernetes.io/master`. If for some reason, this assumption is not true in your case, then set the
|
||||||
|
envirnment variable in the deployment:
|
||||||
|
|
||||||
|
```
|
||||||
|
env:
|
||||||
|
- name: PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL
|
||||||
|
value: <label>
|
||||||
|
```
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
The autoscaler will not remove nodes which have non-default kube-system pods.
|
The autoscaler will not remove nodes which have non-default kube-system pods.
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,11 @@ const (
|
||||||
ProviderName = "packet"
|
ProviderName = "packet"
|
||||||
// GPULabel is the label added to nodes with GPU resource.
|
// GPULabel is the label added to nodes with GPU resource.
|
||||||
GPULabel = "cloud.google.com/gke-accelerator"
|
GPULabel = "cloud.google.com/gke-accelerator"
|
||||||
|
// DefaultControllerNodeLabelKey is the label added to Master/Controller to identify as
|
||||||
|
// master/controller node.
|
||||||
|
DefaultControllerNodeLabelKey = "node-role.kubernetes.io/master"
|
||||||
|
// ControllerNodeIdentifierEnv is the string for the environment variable.
|
||||||
|
ControllerNodeIdentifierEnv = "PACKET_CONTROLLER_NODE_IDENTIFIER_LABEL"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
@ -94,7 +99,13 @@ func (pcp *packetCloudProvider) AddNodeGroup(group packetNodeGroup) {
|
||||||
//
|
//
|
||||||
// Since only a single node group is currently supported, the first node group is always returned.
|
// Since only a single node group is currently supported, the first node group is always returned.
|
||||||
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
|
func (pcp *packetCloudProvider) NodeGroupForNode(node *apiv1.Node) (cloudprovider.NodeGroup, error) {
|
||||||
if _, found := node.ObjectMeta.Labels["node-role.kubernetes.io/master"]; found {
|
controllerNodeLabel := os.Getenv(ControllerNodeIdentifierEnv)
|
||||||
|
if controllerNodeLabel == "" {
|
||||||
|
klog.V(3).Infof("env %s not set, using default: %s", ControllerNodeIdentifierEnv, DefaultControllerNodeLabelKey)
|
||||||
|
controllerNodeLabel = DefaultControllerNodeLabelKey
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, found := node.ObjectMeta.Labels[controllerNodeLabel]; found {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
|
nodeGroupId, err := pcp.packetManager.NodeGroupForNode(node.ObjectMeta.Labels, node.Spec.ProviderID)
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,8 @@ import (
|
||||||
const (
|
const (
|
||||||
userAgent = "kubernetes/cluster-autoscaler/" + version.ClusterAutoscalerVersion
|
userAgent = "kubernetes/cluster-autoscaler/" + version.ClusterAutoscalerVersion
|
||||||
expectedAPIContentTypePrefix = "application/json"
|
expectedAPIContentTypePrefix = "application/json"
|
||||||
|
packetPrefix = "packet://"
|
||||||
|
equinixMetalPrefix = "equinixmetal://"
|
||||||
)
|
)
|
||||||
|
|
||||||
type instanceType struct {
|
type instanceType struct {
|
||||||
|
|
@ -292,7 +294,12 @@ func Contains(a []string, x string) bool {
|
||||||
// createPacketManagerRest sets up the client and returns
|
// createPacketManagerRest sets up the client and returns
|
||||||
// an packetManagerRest.
|
// an packetManagerRest.
|
||||||
func createPacketManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*packetManagerRest, error) {
|
func createPacketManagerRest(configReader io.Reader, discoverOpts cloudprovider.NodeGroupDiscoveryOptions, opts config.AutoscalingOptions) (*packetManagerRest, error) {
|
||||||
var cfg ConfigFile
|
// Initialize ConfigFile instance
|
||||||
|
cfg := ConfigFile{
|
||||||
|
DefaultNodegroupdef: ConfigNodepool{},
|
||||||
|
Nodegroupdef: map[string]*ConfigNodepool{},
|
||||||
|
}
|
||||||
|
|
||||||
if configReader != nil {
|
if configReader != nil {
|
||||||
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
|
if err := gcfg.ReadInto(&cfg, configReader); err != nil {
|
||||||
klog.Errorf("Couldn't read config: %v", err)
|
klog.Errorf("Couldn't read config: %v", err)
|
||||||
|
|
@ -431,7 +438,11 @@ func (mgr *packetManagerRest) NodeGroupForNode(labels map[string]string, nodeId
|
||||||
if nodegroup, ok := labels["pool"]; ok {
|
if nodegroup, ok := labels["pool"]; ok {
|
||||||
return nodegroup, nil
|
return nodegroup, nil
|
||||||
}
|
}
|
||||||
device, err := mgr.getPacketDevice(context.TODO(), strings.TrimPrefix(nodeId, "packet://"))
|
|
||||||
|
trimmedNodeId := strings.TrimPrefix(nodeId, packetPrefix)
|
||||||
|
trimmedNodeId = strings.TrimPrefix(trimmedNodeId, equinixMetalPrefix)
|
||||||
|
|
||||||
|
device, err := mgr.getPacketDevice(context.TODO(), trimmedNodeId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err)
|
return "", fmt.Errorf("Could not find group for node: %s %s", nodeId, err)
|
||||||
}
|
}
|
||||||
|
|
@ -590,9 +601,30 @@ func (mgr *packetManagerRest) getNodes(nodegroup string) ([]string, error) {
|
||||||
|
|
||||||
nodes := []string{}
|
nodes := []string{}
|
||||||
|
|
||||||
|
// This bit of code along with the switch statement, checks if the CCM installed on the cluster is
|
||||||
|
// `packet-ccm` or `cloud-provider-equinix-metal`. The reason its important to check because depending
|
||||||
|
// on the CCM installed, the prefix in providerID of K8s Node spec differs from either `packet://` or
|
||||||
|
// `equinixmetal://`. This is now needed as `packet-ccm` is now deprecated and renamed in favor of
|
||||||
|
// `cloud-provider-equinix-metal`.
|
||||||
|
// This code checks if the INSTALLED_CCM env var is set or not. If set to `cloud-provider-equinix-metal`,
|
||||||
|
// the prefix is set to `equinixmetal://` and any other case the prefix is `packet://`.
|
||||||
|
// At a later point in time, there would be a need to make `equinixmetal://` prefix as the default or do away
|
||||||
|
// with `packet://` prefix entirely. This should happen presumably when the packet code in autoscaler is
|
||||||
|
// renamed from packet to equinixmetal.
|
||||||
|
prefix := packetPrefix
|
||||||
|
|
||||||
|
switch installedCCM := os.Getenv("INSTALLED_CCM"); installedCCM {
|
||||||
|
case "packet-ccm":
|
||||||
|
prefix = packetPrefix
|
||||||
|
case "cloud-provider-equinix-metal":
|
||||||
|
prefix = equinixMetalPrefix
|
||||||
|
default:
|
||||||
|
klog.V(3).Info("Unrecognized value: expected INSTALLED_CCM to be either `packet-ccm` or `cloud-provider-equinix-metal`, using default: `packet-ccm`")
|
||||||
|
}
|
||||||
|
|
||||||
for _, d := range devices.Devices {
|
for _, d := range devices.Devices {
|
||||||
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
||||||
nodes = append(nodes, fmt.Sprintf("packet://%s", d.ID))
|
nodes = append(nodes, fmt.Sprintf("%s%s", prefix, d.ID))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -660,11 +692,15 @@ func (mgr *packetManagerRest) deleteNodes(nodegroup string, nodes []NodeRef, upd
|
||||||
klog.Infof("Checking device %v", d)
|
klog.Infof("Checking device %v", d)
|
||||||
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
if Contains(d.Tags, "k8s-cluster-"+mgr.getNodePoolDefinition(nodegroup).clusterName) && Contains(d.Tags, "k8s-nodepool-"+nodegroup) {
|
||||||
klog.Infof("nodegroup match %s %s", d.Hostname, n.Name)
|
klog.Infof("nodegroup match %s %s", d.Hostname, n.Name)
|
||||||
|
|
||||||
|
trimmedName := strings.TrimPrefix(n.Name, packetPrefix)
|
||||||
|
trimmedName = strings.TrimPrefix(trimmedName, equinixMetalPrefix)
|
||||||
|
|
||||||
switch {
|
switch {
|
||||||
case d.Hostname == n.Name:
|
case d.Hostname == n.Name:
|
||||||
klog.V(1).Infof("Matching Packet Device %s - %s", d.Hostname, d.ID)
|
klog.V(1).Infof("Matching Packet Device %s - %s", d.Hostname, d.ID)
|
||||||
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
||||||
case fakeNode && strings.TrimPrefix(n.Name, "packet://") == d.ID:
|
case fakeNode && trimmedName == d.ID:
|
||||||
klog.V(1).Infof("Fake Node %s", d.ID)
|
klog.V(1).Infof("Fake Node %s", d.ID)
|
||||||
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
errList = append(errList, mgr.deleteDevice(ctx, nodegroup, d.ID))
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue