Provide fake proivder IDs for failed machines
This commit is contained in:
parent
38df5c630a
commit
b898c1f151
|
|
@ -20,6 +20,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
|
|
@ -46,6 +47,7 @@ const (
|
|||
resourceNameMachine = "machines"
|
||||
resourceNameMachineSet = "machinesets"
|
||||
resourceNameMachineDeployment = "machinedeployments"
|
||||
failedMachinePrefix = "failed-machine-"
|
||||
)
|
||||
|
||||
// machineController watches for Nodes, Machines, MachineSets and
|
||||
|
|
@ -219,6 +221,16 @@ func (c *machineController) findMachineByProviderID(providerID normalizedProvide
|
|||
}
|
||||
}
|
||||
|
||||
if isFailedMachineProviderID(providerID) {
|
||||
machine, err := c.findMachine(machineKeyFromFailedProviderID(providerID))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if machine != nil {
|
||||
return machine.DeepCopy(), nil
|
||||
}
|
||||
}
|
||||
|
||||
// If the machine object has no providerID--maybe actuator
|
||||
// does not set this value (e.g., OpenStack)--then first
|
||||
// lookup the node using ProviderID. If that is successful
|
||||
|
|
@ -234,6 +246,15 @@ func (c *machineController) findMachineByProviderID(providerID normalizedProvide
|
|||
return c.findMachine(node.Annotations[machineAnnotationKey])
|
||||
}
|
||||
|
||||
func isFailedMachineProviderID(providerID normalizedProviderID) bool {
|
||||
return strings.HasPrefix(string(providerID), failedMachinePrefix)
|
||||
}
|
||||
|
||||
func machineKeyFromFailedProviderID(providerID normalizedProviderID) string {
|
||||
namespaceName := strings.TrimPrefix(string(providerID), failedMachinePrefix)
|
||||
return strings.Replace(namespaceName, "_", "/", 1)
|
||||
}
|
||||
|
||||
// findNodeByNodeName finds the Node object keyed by name.. Returns
|
||||
// nil if it cannot be found. A DeepCopy() of the object is returned
|
||||
// on success.
|
||||
|
|
@ -393,6 +414,17 @@ func (c *machineController) machineSetProviderIDs(machineSet *MachineSet) ([]str
|
|||
continue
|
||||
}
|
||||
|
||||
if machine.Status.FailureMessage != nil {
|
||||
klog.V(4).Infof("Status.FailureMessage of machine %q is %q", machine.Name, *machine.Status.FailureMessage)
|
||||
// Provide a fake ID to allow the autoscaler to track machines that will never
|
||||
// become nodes and mark the nodegroup unhealthy after maxNodeProvisionTime.
|
||||
// Fake ID needs to be recognised later and converted into a machine key.
|
||||
// Use an underscore as a separator between namespace and name as it is not a
|
||||
// valid character within a namespace name.
|
||||
providerIDs = append(providerIDs, fmt.Sprintf("%s%s_%s", failedMachinePrefix, machine.Namespace, machine.Name))
|
||||
continue
|
||||
}
|
||||
|
||||
if machine.Status.NodeRef == nil {
|
||||
klog.V(4).Infof("Status.NodeRef of machine %q is currently nil", machine.Name)
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -450,7 +450,17 @@ func TestControllerFindMachineByProviderID(t *testing.T) {
|
|||
t.Fatalf("expected machines to be equal - expected %+v, got %+v", testConfig.machines[0], machine)
|
||||
}
|
||||
|
||||
// Test #2: Verify machine is not found if it has a
|
||||
// Test #2: Verify machine returned by fake provider ID is correct machine
|
||||
fakeProviderID := fmt.Sprintf("%s$s/%s", testConfig.machines[0].Namespace, testConfig.machines[0].Name)
|
||||
machine, err = controller.findMachineByProviderID(normalizedProviderID(fakeProviderID))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if machine != nil {
|
||||
t.Fatal("expected find to fail")
|
||||
}
|
||||
|
||||
// Test #3: Verify machine is not found if it has a
|
||||
// non-existent or different provider ID.
|
||||
machine = testConfig.machines[0].DeepCopy()
|
||||
machine.Spec.ProviderID = pointer.StringPtr("does-not-match")
|
||||
|
|
|
|||
|
|
@ -115,6 +115,10 @@ func newMachineFromUnstructured(u *unstructured.Unstructured) *Machine {
|
|||
machine.Status.NodeRef = &nodeRef
|
||||
}
|
||||
|
||||
if failureMessage, _, _ := unstructured.NestedString(u.Object, "status", "failureMessage"); failureMessage != "" {
|
||||
machine.Status.FailureMessage = pointer.StringPtr(failureMessage)
|
||||
}
|
||||
|
||||
return &machine
|
||||
}
|
||||
|
||||
|
|
@ -184,5 +188,9 @@ func newUnstructuredFromMachine(m *Machine) *unstructured.Unstructured {
|
|||
}
|
||||
}
|
||||
|
||||
if m.Status.FailureMessage != nil {
|
||||
unstructured.SetNestedField(u.Object, *m.Status.FailureMessage, "status", "failureMessage")
|
||||
}
|
||||
|
||||
return &u
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,25 @@ type MachineStatus struct {
|
|||
// NodeRef will point to the corresponding Node if it exists.
|
||||
// +optional
|
||||
NodeRef *corev1.ObjectReference `json:"nodeRef,omitempty"`
|
||||
|
||||
// FailureMessage will be set in the event that there is a terminal problem
|
||||
// reconciling the Machine and will contain a more verbose string suitable
|
||||
// for logging and human consumption.
|
||||
//
|
||||
// This field should not be set for transitive errors that a controller
|
||||
// faces that are expected to be fixed automatically over
|
||||
// time (like service outages), but instead indicate that something is
|
||||
// fundamentally wrong with the Machine's spec or the configuration of
|
||||
// the controller, and that manual intervention is required. Examples
|
||||
// of terminal errors would be invalid combinations of settings in the
|
||||
// spec, values that are unsupported by the controller, or the
|
||||
// responsible controller itself being critically misconfigured.
|
||||
//
|
||||
// Any transient errors that occur during the reconciliation of Machines
|
||||
// can be added as events to the Machine object and/or logged in the
|
||||
// controller's output.
|
||||
// +optional
|
||||
FailureMessage *string `json:"failureMessage,omitempty"`
|
||||
}
|
||||
|
||||
// MachineList contains a list of Machine
|
||||
|
|
|
|||
|
|
@ -318,6 +318,11 @@ func (in *MachineSpec) DeepCopy() *MachineSpec {
|
|||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *MachineStatus) DeepCopyInto(out *MachineStatus) {
|
||||
*out = *in
|
||||
if in.FailureMessage != nil {
|
||||
in, out := &in.FailureMessage, &out.FailureMessage
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
if in.NodeRef != nil {
|
||||
in, out := &in.NodeRef, &out.NodeRef
|
||||
*out = new(v1.ObjectReference)
|
||||
|
|
|
|||
Loading…
Reference in New Issue