add node claim into ReplicaRequirements

Signed-off-by: Garrybest <garrybest@foxmail.com>
This commit is contained in:
Garrybest 2021-08-24 21:02:45 +08:00
parent 2cfab7b24d
commit 4f8215e071
7 changed files with 462 additions and 47 deletions

View File

@ -57,15 +57,167 @@ spec:
- name
type: object
type: array
replicaResourceRequirements:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: ReplicaResourceRequirements represents the resources
required by each replica.
replicaRequirements:
description: ReplicaRequirements represents the requirements required
by each replica.
properties:
nodeClaim:
description: NodeClaim represents the node claim HardNodeAffinity,
NodeSelector and Tolerations required by each replica.
properties:
hardNodeAffinity:
description: A node selector represents the union of the results
of one or more label queries over a set of nodes; that is,
it represents the OR of the selectors represented by the
node selector terms. Note that only PodSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
is included here because it has a hard limit on pod scheduling.
properties:
nodeSelectorTerms:
description: Required. A list of node selector terms.
The terms are ORed.
items:
description: A null or empty node selector term matches
no objects. The requirements of them are ANDed. The
TopologySelectorTerm type implements a subset of the
NodeSelectorTerm.
properties:
matchExpressions:
description: A list of node selector requirements
by node's labels.
items:
description: A node selector requirement is a
selector that contains values, a key, and an
operator that relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are
In, NotIn, Exists, DoesNotExist. Gt, and
Lt.
type: string
values:
description: An array of string values. If
the operator is In or NotIn, the values
array must be non-empty. If the operator
is Exists or DoesNotExist, the values array
must be empty. If the operator is Gt or
Lt, the values array must have a single
element, which will be interpreted as an
integer. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
by node's fields.
items:
description: A node selector requirement is a
selector that contains values, a key, and an
operator that relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are
In, NotIn, Exists, DoesNotExist. Gt, and
Lt.
type: string
values:
description: An array of string values. If
the operator is In or NotIn, the values
array must be non-empty. If the operator
is Exists or DoesNotExist, the values array
must be empty. If the operator is Gt or
Lt, the values array must have a single
element, which will be interpreted as an
integer. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
type: object
type: array
required:
- nodeSelectorTerms
type: object
nodeSelector:
additionalProperties:
type: string
description: NodeSelector is a selector which must be true
for the pod to fit on a node. Selector which must match
a node's labels for the pod to be scheduled on that node.
type: object
tolerations:
description: If specified, the pod's tolerations.
items:
description: The pod this Toleration is attached to tolerates
any taint that matches the triple <key,value,effect> using
the matching operator <operator>.
properties:
effect:
description: Effect indicates the taint effect to match.
Empty means match all taint effects. When specified,
allowed values are NoSchedule, PreferNoSchedule and
NoExecute.
type: string
key:
description: Key is the taint key that the toleration
applies to. Empty means match all taint keys. If the
key is empty, operator must be Exists; this combination
means to match all values and all keys.
type: string
operator:
description: Operator represents a key's relationship
to the value. Valid operators are Exists and Equal.
Defaults to Equal. Exists is equivalent to wildcard
for value, so that a pod can tolerate all taints of
a particular category.
type: string
tolerationSeconds:
description: TolerationSeconds represents the period
of time the toleration (which must be of effect NoExecute,
otherwise this field is ignored) tolerates the taint.
By default, it is not set, which means tolerate the
taint forever (do not evict). Zero and negative values
will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: Value is the taint value the toleration
matches to. If the operator is Exists, the value should
be empty, otherwise just a regular string.
type: string
type: object
type: array
type: object
resourceRequest:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: ResourceRequest represents the resources required
by each replica.
type: object
type: object
replicas:
description: Replicas represents the replica number of the referencing

View File

@ -57,15 +57,167 @@ spec:
- name
type: object
type: array
replicaResourceRequirements:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: ReplicaResourceRequirements represents the resources
required by each replica.
replicaRequirements:
description: ReplicaRequirements represents the requirements required
by each replica.
properties:
nodeClaim:
description: NodeClaim represents the node claim HardNodeAffinity,
NodeSelector and Tolerations required by each replica.
properties:
hardNodeAffinity:
description: A node selector represents the union of the results
of one or more label queries over a set of nodes; that is,
it represents the OR of the selectors represented by the
node selector terms. Note that only PodSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
is included here because it has a hard limit on pod scheduling.
properties:
nodeSelectorTerms:
description: Required. A list of node selector terms.
The terms are ORed.
items:
description: A null or empty node selector term matches
no objects. The requirements of them are ANDed. The
TopologySelectorTerm type implements a subset of the
NodeSelectorTerm.
properties:
matchExpressions:
description: A list of node selector requirements
by node's labels.
items:
description: A node selector requirement is a
selector that contains values, a key, and an
operator that relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are
In, NotIn, Exists, DoesNotExist. Gt, and
Lt.
type: string
values:
description: An array of string values. If
the operator is In or NotIn, the values
array must be non-empty. If the operator
is Exists or DoesNotExist, the values array
must be empty. If the operator is Gt or
Lt, the values array must have a single
element, which will be interpreted as an
integer. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
matchFields:
description: A list of node selector requirements
by node's fields.
items:
description: A node selector requirement is a
selector that contains values, a key, and an
operator that relates the key and values.
properties:
key:
description: The label key that the selector
applies to.
type: string
operator:
description: Represents a key's relationship
to a set of values. Valid operators are
In, NotIn, Exists, DoesNotExist. Gt, and
Lt.
type: string
values:
description: An array of string values. If
the operator is In or NotIn, the values
array must be non-empty. If the operator
is Exists or DoesNotExist, the values array
must be empty. If the operator is Gt or
Lt, the values array must have a single
element, which will be interpreted as an
integer. This array is replaced during a
strategic merge patch.
items:
type: string
type: array
required:
- key
- operator
type: object
type: array
type: object
type: array
required:
- nodeSelectorTerms
type: object
nodeSelector:
additionalProperties:
type: string
description: NodeSelector is a selector which must be true
for the pod to fit on a node. Selector which must match
a node's labels for the pod to be scheduled on that node.
type: object
tolerations:
description: If specified, the pod's tolerations.
items:
description: The pod this Toleration is attached to tolerates
any taint that matches the triple <key,value,effect> using
the matching operator <operator>.
properties:
effect:
description: Effect indicates the taint effect to match.
Empty means match all taint effects. When specified,
allowed values are NoSchedule, PreferNoSchedule and
NoExecute.
type: string
key:
description: Key is the taint key that the toleration
applies to. Empty means match all taint keys. If the
key is empty, operator must be Exists; this combination
means to match all values and all keys.
type: string
operator:
description: Operator represents a key's relationship
to the value. Valid operators are Exists and Equal.
Defaults to Equal. Exists is equivalent to wildcard
for value, so that a pod can tolerate all taints of
a particular category.
type: string
tolerationSeconds:
description: TolerationSeconds represents the period
of time the toleration (which must be of effect NoExecute,
otherwise this field is ignored) tolerates the taint.
By default, it is not set, which means tolerate the
taint forever (do not evict). Zero and negative values
will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: Value is the taint value the toleration
matches to. If the operator is Exists, the value should
be empty, otherwise just a regular string.
type: string
type: object
type: array
type: object
resourceRequest:
additionalProperties:
anyOf:
- type: integer
- type: string
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
description: ResourceRequest represents the resources required
by each replica.
type: object
type: object
replicas:
description: Replicas represents the replica number of the referencing

View File

@ -29,9 +29,9 @@ type ResourceBindingSpec struct {
// Resource represents the Kubernetes resource to be propagated.
Resource ObjectReference `json:"resource"`
// ReplicaResourceRequirements represents the resources required by each replica.
// ReplicaRequirements represents the requirements required by each replica.
// +optional
ReplicaResourceRequirements corev1.ResourceList `json:"replicaResourceRequirements,omitempty"`
ReplicaRequirements *ReplicaRequirements `json:"replicaRequirements,omitempty"`
// Replicas represents the replica number of the referencing resource.
// +optional
@ -66,6 +66,34 @@ type ObjectReference struct {
ResourceVersion string `json:"resourceVersion,omitempty"`
}
// ReplicaRequirements represents the requirements required by each replica.
type ReplicaRequirements struct {
// NodeClaim represents the node claim HardNodeAffinity, NodeSelector and Tolerations required by each replica.
// +optional
NodeClaim *NodeClaim `json:"nodeClaim,omitempty"`
// ResourceRequest represents the resources required by each replica.
// +optional
ResourceRequest corev1.ResourceList `json:"resourceRequest,omitempty"`
}
// NodeClaim represents the node claim HardNodeAffinity, NodeSelector and Tolerations required by each replica.
type NodeClaim struct {
// A node selector represents the union of the results of one or more label queries over a set of
// nodes; that is, it represents the OR of the selectors represented by the node selector terms.
// Note that only PodSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
// is included here because it has a hard limit on pod scheduling.
// +optional
HardNodeAffinity *corev1.NodeSelector `json:"hardNodeAffinity,omitempty"`
// NodeSelector is a selector which must be true for the pod to fit on a node.
// Selector which must match a node's labels for the pod to be scheduled on that node.
// +optional
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
// If specified, the pod's tolerations.
// +optional
Tolerations []corev1.Toleration `json:"tolerations,omitempty"`
}
// TargetCluster represents the identifier of a member cluster.
type TargetCluster struct {
// Name of target cluster.

View File

@ -131,6 +131,41 @@ func (in *ManifestStatus) DeepCopy() *ManifestStatus {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeClaim) DeepCopyInto(out *NodeClaim) {
*out = *in
if in.HardNodeAffinity != nil {
in, out := &in.HardNodeAffinity, &out.HardNodeAffinity
*out = new(v1.NodeSelector)
(*in).DeepCopyInto(*out)
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = make(map[string]string, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.Tolerations != nil {
in, out := &in.Tolerations, &out.Tolerations
*out = make([]v1.Toleration, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeClaim.
func (in *NodeClaim) DeepCopy() *NodeClaim {
if in == nil {
return nil
}
out := new(NodeClaim)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ObjectReference) DeepCopyInto(out *ObjectReference) {
*out = *in
@ -147,6 +182,34 @@ func (in *ObjectReference) DeepCopy() *ObjectReference {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ReplicaRequirements) DeepCopyInto(out *ReplicaRequirements) {
*out = *in
if in.NodeClaim != nil {
in, out := &in.NodeClaim, &out.NodeClaim
*out = new(NodeClaim)
(*in).DeepCopyInto(*out)
}
if in.ResourceRequest != nil {
in, out := &in.ResourceRequest, &out.ResourceRequest
*out = make(v1.ResourceList, len(*in))
for key, val := range *in {
(*out)[key] = val.DeepCopy()
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ReplicaRequirements.
func (in *ReplicaRequirements) DeepCopy() *ReplicaRequirements {
if in == nil {
return nil
}
out := new(ReplicaRequirements)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ResourceBinding) DeepCopyInto(out *ResourceBinding) {
*out = *in
@ -212,12 +275,10 @@ func (in *ResourceBindingList) DeepCopyObject() runtime.Object {
func (in *ResourceBindingSpec) DeepCopyInto(out *ResourceBindingSpec) {
*out = *in
out.Resource = in.Resource
if in.ReplicaResourceRequirements != nil {
in, out := &in.ReplicaResourceRequirements, &out.ReplicaResourceRequirements
*out = make(v1.ResourceList, len(*in))
for key, val := range *in {
(*out)[key] = val.DeepCopy()
}
if in.ReplicaRequirements != nil {
in, out := &in.ReplicaRequirements, &out.ReplicaRequirements
*out = new(ReplicaRequirements)
(*in).DeepCopyInto(*out)
}
if in.Clusters != nil {
in, out := &in.Clusters, &out.Clusters

View File

@ -297,9 +297,11 @@ func (a TargetClustersList) Less(i, j int) bool { return a[i].Replicas > a[j].Re
func (g *genericScheduler) divideReplicasAggregatedWithResource(clusters []*clusterv1alpha1.Cluster, spec *workv1alpha1.ResourceBindingSpec,
preUsedClustersName ...string) ([]workv1alpha1.TargetCluster, error) {
// preUsedClustersName is used to prioritize the clusters
for _, value := range spec.ReplicaResourceRequirements {
if value.Value() > 0 {
return g.divideReplicasAggregatedWithResourceRequirements(clusters, spec, preUsedClustersName...)
if spec.ReplicaRequirements != nil {
for _, value := range spec.ReplicaRequirements.ResourceRequest {
if value.Value() > 0 {
return g.divideReplicasAggregatedWithResourceRequirements(clusters, spec, preUsedClustersName...)
}
}
}
return g.divideReplicasAggregatedWithoutResourceRequirements(clusters, spec, preUsedClustersName...)
@ -323,16 +325,16 @@ func (g *genericScheduler) divideReplicasAggregatedWithResourceRequirements(clus
// so that we can assign new replicas to them preferentially when scale up.
// preUsedClusters have none items during first scheduler
preUsedClusters, unUsedClusters := g.getPreUsed(clusters, preUsedClustersName...)
preUsedClustersAvailableReplicas := g.calAvailableReplicas(preUsedClusters, spec.ReplicaResourceRequirements)
unUsedClustersAvailableReplicas := g.calAvailableReplicas(unUsedClusters, spec.ReplicaResourceRequirements)
preUsedClustersAvailableReplicas := g.calAvailableReplicas(preUsedClusters, spec.ReplicaRequirements)
unUsedClustersAvailableReplicas := g.calAvailableReplicas(unUsedClusters, spec.ReplicaRequirements)
clusterAvailableReplicas := append(preUsedClustersAvailableReplicas, unUsedClustersAvailableReplicas...)
return g.divideReplicasAggregatedWithClusterReplicas(clusterAvailableReplicas, spec.Replicas)
}
func (g *genericScheduler) calAvailableReplicas(clusters []*clusterv1alpha1.Cluster, replicaResourceRequirements corev1.ResourceList) []workv1alpha1.TargetCluster {
func (g *genericScheduler) calAvailableReplicas(clusters []*clusterv1alpha1.Cluster, replicaRequirements *workv1alpha1.ReplicaRequirements) []workv1alpha1.TargetCluster {
availableTargetClusters := make([]workv1alpha1.TargetCluster, len(clusters))
for i, cluster := range clusters {
maxReplicas := g.calClusterAvailableReplicas(cluster, replicaResourceRequirements)
maxReplicas := g.calClusterAvailableReplicas(cluster, replicaRequirements)
availableTargetClusters[i] = workv1alpha1.TargetCluster{Name: cluster.Name, Replicas: maxReplicas}
}
sort.Sort(TargetClustersList(availableTargetClusters))
@ -340,11 +342,11 @@ func (g *genericScheduler) calAvailableReplicas(clusters []*clusterv1alpha1.Clus
}
// calClusterAvailableReplicas calculates how many replicas can be applied to the target cluster.
func (g *genericScheduler) calClusterAvailableReplicas(cluster *clusterv1alpha1.Cluster, resourcePerReplicas corev1.ResourceList) int32 {
func (g *genericScheduler) calClusterAvailableReplicas(cluster *clusterv1alpha1.Cluster, replicaRequirements *workv1alpha1.ReplicaRequirements) int32 {
var maximumReplicas int64 = math.MaxInt32
resourceSummary := cluster.Status.ResourceSummary
for key, value := range resourcePerReplicas {
for key, value := range replicaRequirements.ResourceRequest {
requestedQuantity := value.Value()
if requestedQuantity <= 0 {
continue

View File

@ -587,7 +587,7 @@ func (d *ResourceDetector) ClaimClusterPolicyForObject(object *unstructured.Unst
// BuildResourceBinding builds a desired ResourceBinding for object.
func (d *ResourceDetector) BuildResourceBinding(object *unstructured.Unstructured, objectKey keys.ClusterWideKey, labels map[string]string) (*workv1alpha1.ResourceBinding, error) {
bindingName := names.GenerateBindingName(object.GetKind(), object.GetName())
replicaResourceRequirements, replicas, err := d.GetReplicaDeclaration(object)
replicaRequirements, replicas, err := d.GetReplicaDeclaration(object)
if err != nil {
return nil, err
}
@ -608,8 +608,8 @@ func (d *ResourceDetector) BuildResourceBinding(object *unstructured.Unstructure
Name: object.GetName(),
ResourceVersion: object.GetResourceVersion(),
},
ReplicaResourceRequirements: replicaResourceRequirements,
Replicas: replicas,
ReplicaRequirements: replicaRequirements,
Replicas: replicas,
},
}
@ -619,7 +619,7 @@ func (d *ResourceDetector) BuildResourceBinding(object *unstructured.Unstructure
// BuildClusterResourceBinding builds a desired ClusterResourceBinding for object.
func (d *ResourceDetector) BuildClusterResourceBinding(object *unstructured.Unstructured, objectKey keys.ClusterWideKey, labels map[string]string) (*workv1alpha1.ClusterResourceBinding, error) {
bindingName := names.GenerateBindingName(object.GetKind(), object.GetName())
replicaResourceRequirements, replicas, err := d.GetReplicaDeclaration(object)
replicaRequirements, replicas, err := d.GetReplicaDeclaration(object)
if err != nil {
return nil, err
}
@ -638,8 +638,8 @@ func (d *ResourceDetector) BuildClusterResourceBinding(object *unstructured.Unst
Name: object.GetName(),
ResourceVersion: object.GetResourceVersion(),
},
ReplicaResourceRequirements: replicaResourceRequirements,
Replicas: replicas,
ReplicaRequirements: replicaRequirements,
Replicas: replicas,
},
}
@ -647,7 +647,7 @@ func (d *ResourceDetector) BuildClusterResourceBinding(object *unstructured.Unst
}
// GetReplicaDeclaration get the replicas and resource requirements of a Deployment object
func (d *ResourceDetector) GetReplicaDeclaration(object *unstructured.Unstructured) (corev1.ResourceList, int32, error) {
func (d *ResourceDetector) GetReplicaDeclaration(object *unstructured.Unstructured) (*workv1alpha1.ReplicaRequirements, int32, error) {
if object.GetKind() == util.DeploymentKind {
replicas, ok, err := unstructured.NestedInt64(object.Object, util.SpecField, util.ReplicasField)
if !ok || err != nil {
@ -657,24 +657,27 @@ func (d *ResourceDetector) GetReplicaDeclaration(object *unstructured.Unstructur
if !ok || err != nil {
return nil, 0, err
}
replicaResourceRequirements, err := d.getReplicaResourceRequirements(podTemplate)
replicaRequirements, err := d.getReplicaRequirements(podTemplate)
if err != nil {
return nil, 0, err
}
return replicaResourceRequirements, int32(replicas), nil
return replicaRequirements, int32(replicas), nil
}
return nil, 0, nil
}
func (d *ResourceDetector) getReplicaResourceRequirements(object map[string]interface{}) (corev1.ResourceList, error) {
func (d *ResourceDetector) getReplicaRequirements(object map[string]interface{}) (*workv1alpha1.ReplicaRequirements, error) {
var podTemplateSpec *corev1.PodTemplateSpec
err := runtime.DefaultUnstructuredConverter.FromUnstructured(object, &podTemplateSpec)
if err != nil {
return nil, err
}
res := util.EmptyResource().AddPodRequest(&podTemplateSpec.Spec)
replicaResourceRequirements := res.ResourceList()
return replicaResourceRequirements, nil
replicaRequirements := &workv1alpha1.ReplicaRequirements{
NodeClaim: helper.GenerateNodeClaimByPodSpec(&podTemplateSpec.Spec),
ResourceRequest: res.ResourceList(),
}
return replicaRequirements, nil
}
// AddWaiting adds object's key to waiting list.

View File

@ -4,6 +4,7 @@ import (
"context"
"sort"
corev1 "k8s.io/api/core/v1"
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@ -448,3 +449,19 @@ func DeleteWorks(c client.Client, selector labels.Set) (controllerruntime.Result
return controllerruntime.Result{}, nil
}
// GenerateNodeClaimByPodSpec will return a NodeClaim from PodSpec.
func GenerateNodeClaimByPodSpec(podSpec *corev1.PodSpec) *workv1alpha1.NodeClaim {
nodeClaim := &workv1alpha1.NodeClaim{
NodeSelector: podSpec.NodeSelector,
Tolerations: podSpec.Tolerations,
}
hasAffinity := podSpec.Affinity != nil && podSpec.Affinity.NodeAffinity != nil && podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil
if hasAffinity {
nodeClaim.HardNodeAffinity = podSpec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
if nodeClaim.NodeSelector == nil && nodeClaim.HardNodeAffinity == nil && len(nodeClaim.Tolerations) == 0 {
return nil
}
return nodeClaim
}