Resource quota enforcement webhook (#544)
* Cert configuration and reloading * Add support for strict webhook error handling * Improve webhook error handling * Don't deregister the webhook when failure policy is strict * standard error message capitalization * have the webhook parse its own configuration from flags * clean up cert provider code * Add explanation for skipping deregistration * Resource Quota enforcement webhook * Fix bad merge * Cleanup, fixes * Cleanup * Document the quota enforcer
This commit is contained in:
parent
dc27045520
commit
edcf4cdc32
|
@ -1229,6 +1229,7 @@
|
|||
"k8s.io/client-go/discovery",
|
||||
"k8s.io/client-go/discovery/fake",
|
||||
"k8s.io/client-go/informers",
|
||||
"k8s.io/client-go/informers/core/v1",
|
||||
"k8s.io/client-go/kubernetes",
|
||||
"k8s.io/client-go/kubernetes/fake",
|
||||
"k8s.io/client-go/kubernetes/scheme",
|
||||
|
|
|
@ -38,6 +38,7 @@ The Kubernetes Operator for Apache Spark ships with a command-line tool called `
|
|||
* [Configuring Automatic Application Re-submission on Submission Failures](#configuring-automatic-application-re-submission-on-submission-failures)
|
||||
* [Running Spark Applications on a Schedule using a ScheduledSparkApplication](#running-spark-applications-on-a-schedule-using-a-scheduledsparkapplication)
|
||||
* [Enabling Leader Election for High Availability](#enabling-leader-election-for-high-availability)
|
||||
* [Enabling Resource Quota Enforcement](#enabling-resource-quota-enforcement)
|
||||
* [Customizing the Operator](#customizing-the-operator)
|
||||
|
||||
## Using a SparkApplication
|
||||
|
@ -564,6 +565,12 @@ The operator supports a high-availability (HA) mode, in which there can be more
|
|||
| `leader-election-renew-deadline` | 14 seconds | Leader election renew deadline. |
|
||||
| `leader-election-retry-period` | 4 seconds | Leader election retry period. |
|
||||
|
||||
## Enabling Resource Quota Enforcement
|
||||
|
||||
The Spark Operator provides limited support for resource quota enforcement using a validating webhook. It will count the resources of non-terminal-phase SparkApplications and Pods, and determine whether a requested SparkApplication will fit given the remaining resources. ResourceQuota scope selectors are not supported, any ResourceQuota object that does not match the entire namespace will be ignored. Like the native Pod quota enforcement, current usage is updated asynchronously, so some overscheduling is possible.
|
||||
|
||||
If you are running Spark applications in namespaces that are subject to resource quota constraints, consider enabling this feature to avoid driver resource starvation. Quota enforcement can be enabled with the command line arguments `-enable-resource-quota-enforcement=true`. It is recommended to also set `-webhook-fail-on-error=true`.
|
||||
|
||||
## Customizing the Operator
|
||||
|
||||
To customize the operator, you can follow the steps below:
|
||||
|
|
62
main.go
62
main.go
|
@ -54,25 +54,26 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
master = flag.String("master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
|
||||
kubeConfig = flag.String("kubeConfig", "", "Path to a kube config. Only required if out-of-cluster.")
|
||||
installCRDs = flag.Bool("install-crds", true, "Whether to install CRDs")
|
||||
controllerThreads = flag.Int("controller-threads", 10, "Number of worker threads used by the SparkApplication controller.")
|
||||
resyncInterval = flag.Int("resync-interval", 30, "Informer resync interval in seconds.")
|
||||
namespace = flag.String("namespace", apiv1.NamespaceAll, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset.")
|
||||
enableWebhook = flag.Bool("enable-webhook", false, "Whether to enable the mutating admission webhook for admitting and patching Spark pods.")
|
||||
enableMetrics = flag.Bool("enable-metrics", false, "Whether to enable the metrics endpoint.")
|
||||
metricsPort = flag.String("metrics-port", "10254", "Port for the metrics endpoint.")
|
||||
metricsEndpoint = flag.String("metrics-endpoint", "/metrics", "Metrics endpoint.")
|
||||
metricsPrefix = flag.String("metrics-prefix", "", "Prefix for the metrics.")
|
||||
ingressURLFormat = flag.String("ingress-url-format", "", "Ingress URL format.")
|
||||
enableLeaderElection = flag.Bool("leader-election", false, "Enable Spark operator leader election.")
|
||||
leaderElectionLockNamespace = flag.String("leader-election-lock-namespace", "spark-operator", "Namespace in which to create the ConfigMap for leader election.")
|
||||
leaderElectionLockName = flag.String("leader-election-lock-name", "spark-operator-lock", "Name of the ConfigMap for leader election.")
|
||||
leaderElectionLeaseDuration = flag.Duration("leader-election-lease-duration", 15*time.Second, "Leader election lease duration.")
|
||||
leaderElectionRenewDeadline = flag.Duration("leader-election-renew-deadline", 14*time.Second, "Leader election renew deadline.")
|
||||
leaderElectionRetryPeriod = flag.Duration("leader-election-retry-period", 4*time.Second, "Leader election retry period.")
|
||||
batchSchedulerName = flag.String("batch-scheduler-name", "", "Use specified scheduler for pods' batch scheduling.")
|
||||
master = flag.String("master", "", "The address of the Kubernetes API server. Overrides any value in kubeconfig. Only required if out-of-cluster.")
|
||||
kubeConfig = flag.String("kubeConfig", "", "Path to a kube config. Only required if out-of-cluster.")
|
||||
installCRDs = flag.Bool("install-crds", true, "Whether to install CRDs")
|
||||
controllerThreads = flag.Int("controller-threads", 10, "Number of worker threads used by the SparkApplication controller.")
|
||||
resyncInterval = flag.Int("resync-interval", 30, "Informer resync interval in seconds.")
|
||||
namespace = flag.String("namespace", apiv1.NamespaceAll, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset.")
|
||||
enableWebhook = flag.Bool("enable-webhook", false, "Whether to enable the mutating admission webhook for admitting and patching Spark pods.")
|
||||
enableResourceQuotaEnforcement = flag.Bool("enable-resource-quota-enforcement", false, "Whether to enable ResourceQuota enforcement for SparkApplication resources. Requires the webhook to be enabled.")
|
||||
enableMetrics = flag.Bool("enable-metrics", false, "Whether to enable the metrics endpoint.")
|
||||
metricsPort = flag.String("metrics-port", "10254", "Port for the metrics endpoint.")
|
||||
metricsEndpoint = flag.String("metrics-endpoint", "/metrics", "Metrics endpoint.")
|
||||
metricsPrefix = flag.String("metrics-prefix", "", "Prefix for the metrics.")
|
||||
ingressURLFormat = flag.String("ingress-url-format", "", "Ingress URL format.")
|
||||
enableLeaderElection = flag.Bool("leader-election", false, "Enable Spark operator leader election.")
|
||||
leaderElectionLockNamespace = flag.String("leader-election-lock-namespace", "spark-operator", "Namespace in which to create the ConfigMap for leader election.")
|
||||
leaderElectionLockName = flag.String("leader-election-lock-name", "spark-operator-lock", "Name of the ConfigMap for leader election.")
|
||||
leaderElectionLeaseDuration = flag.Duration("leader-election-lease-duration", 15*time.Second, "Leader election lease duration.")
|
||||
leaderElectionRenewDeadline = flag.Duration("leader-election-renew-deadline", 14*time.Second, "Leader election renew deadline.")
|
||||
leaderElectionRetryPeriod = flag.Duration("leader-election-retry-period", 4*time.Second, "Leader election retry period.")
|
||||
batchSchedulerName = flag.String("batch-scheduler-name", "", "Use specified scheduler for pods' batch scheduling.")
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
@ -172,15 +173,26 @@ func main() {
|
|||
|
||||
var hook *webhook.WebHook
|
||||
if *enableWebhook {
|
||||
var coreV1InformerFactory informers.SharedInformerFactory
|
||||
if *enableResourceQuotaEnforcement {
|
||||
coreV1InformerFactory = buildCoreV1InformerFactory(kubeClient)
|
||||
}
|
||||
var err error
|
||||
// Don't deregister webhook on exit if leader election enabled (i.e. multiple webhooks running)
|
||||
hook, err = webhook.New(kubeClient, crInformerFactory, *namespace, !*enableLeaderElection)
|
||||
hook, err = webhook.New(kubeClient, crInformerFactory, *namespace, !*enableLeaderElection, *enableResourceQuotaEnforcement, coreV1InformerFactory)
|
||||
if err != nil {
|
||||
glog.Fatal(err)
|
||||
}
|
||||
if err = hook.Start(); err != nil {
|
||||
|
||||
if *enableResourceQuotaEnforcement {
|
||||
go coreV1InformerFactory.Start(stopCh)
|
||||
}
|
||||
|
||||
if err = hook.Start(stopCh); err != nil {
|
||||
glog.Fatal(err)
|
||||
}
|
||||
} else if *enableResourceQuotaEnforcement {
|
||||
glog.Fatal("Webhook must be enabled to use resource quota enforcement.")
|
||||
}
|
||||
|
||||
if *enableLeaderElection {
|
||||
|
@ -261,3 +273,11 @@ func buildPodInformerFactory(kubeClient clientset.Interface) informers.SharedInf
|
|||
podFactoryOpts = append(podFactoryOpts, informers.WithTweakListOptions(tweakListOptionsFunc))
|
||||
return informers.NewSharedInformerFactoryWithOptions(kubeClient, time.Duration(*resyncInterval)*time.Second, podFactoryOpts...)
|
||||
}
|
||||
|
||||
func buildCoreV1InformerFactory(kubeClient clientset.Interface) informers.SharedInformerFactory {
|
||||
var coreV1FactoryOpts []informers.SharedInformerOption
|
||||
if *namespace != apiv1.NamespaceAll {
|
||||
coreV1FactoryOpts = append(coreV1FactoryOpts, informers.WithNamespace(*namespace))
|
||||
}
|
||||
return informers.NewSharedInformerFactoryWithOptions(kubeClient, time.Duration(*resyncInterval)*time.Second, coreV1FactoryOpts...)
|
||||
}
|
||||
|
|
|
@ -45,6 +45,9 @@ rules:
|
|||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["resourcequotas"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
verbs: ["create", "update", "patch"]
|
||||
|
@ -52,7 +55,7 @@ rules:
|
|||
resources: ["customresourcedefinitions"]
|
||||
verbs: ["create", "get", "update", "delete"]
|
||||
- apiGroups: ["admissionregistration.k8s.io"]
|
||||
resources: ["mutatingwebhookconfigurations"]
|
||||
resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"]
|
||||
verbs: ["create", "get", "update", "delete"]
|
||||
- apiGroups: ["sparkoperator.k8s.io"]
|
||||
resources: ["sparkapplications", "scheduledsparkapplications"]
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
package resourceusage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
so "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1"
|
||||
crdinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions"
|
||||
"github.com/golang/glog"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/client-go/informers"
|
||||
corev1informers "k8s.io/client-go/informers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
type ResourceQuotaEnforcer struct {
|
||||
watcher ResourceUsageWatcher
|
||||
resourceQuotaInformer corev1informers.ResourceQuotaInformer
|
||||
}
|
||||
|
||||
func NewResourceQuotaEnforcer(crdInformerFactory crdinformers.SharedInformerFactory, coreV1InformerFactory informers.SharedInformerFactory) ResourceQuotaEnforcer {
|
||||
resourceUsageWatcher := newResourceUsageWatcher(crdInformerFactory, coreV1InformerFactory)
|
||||
informer := coreV1InformerFactory.Core().V1().ResourceQuotas()
|
||||
informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{})
|
||||
return ResourceQuotaEnforcer{
|
||||
watcher: resourceUsageWatcher,
|
||||
resourceQuotaInformer: informer,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: There appears to be a deadlock in cache.WaitForCacheSync. Possibly related? https://github.com/kubernetes/kubernetes/issues/71450
|
||||
// For now, return immediately. There will be a short window after startup where quota calcuation is incorrect.
|
||||
func (r ResourceQuotaEnforcer) WaitForCacheSync(stopCh <-chan struct{}) error {
|
||||
/*if !cache.WaitForCacheSync(stopCh, func() bool {
|
||||
return r.resourceQuotaInformer.Informer().HasSynced()
|
||||
}) {
|
||||
return fmt.Errorf("cache sync canceled")
|
||||
}*/
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ResourceQuotaEnforcer) admitResource(kind, namespace, name string, requestedResources ResourceList) (string, error) {
|
||||
glog.V(2).Infof("Processing admission request for %s %s/%s, requesting: %s", kind, namespace, name, requestedResources)
|
||||
resourceQuotas, err := r.resourceQuotaInformer.Lister().ResourceQuotas(namespace).List(labels.Everything())
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if (requestedResources.cpu.IsZero() && requestedResources.memory.IsZero()) || len(resourceQuotas) == 0 {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
currentNamespaceUsage, currentApplicationUsage := r.watcher.GetCurrentResourceUsageWithApplication(namespace, kind, name)
|
||||
|
||||
for _, quota := range resourceQuotas {
|
||||
// Scope selectors not currently supported, ignore any ResourceQuota that does not match everything.
|
||||
if quota.Spec.ScopeSelector != nil || len(quota.Spec.Scopes) > 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// If an existing application has increased its usage, check it against the quota again. If its usage hasn't increased, always allow it.
|
||||
if requestedResources.cpu.Cmp(currentApplicationUsage.cpu) == 1 {
|
||||
if cpuLimit, present := quota.Spec.Hard[corev1.ResourceCPU]; present {
|
||||
availableCpu := cpuLimit
|
||||
availableCpu.Sub(currentNamespaceUsage.cpu)
|
||||
if requestedResources.cpu.Cmp(availableCpu) == 1 {
|
||||
return fmt.Sprintf("%s %s/%s requests too many cores (%.3f cores requested, %.3f available).", kind, namespace, name, float64(requestedResources.cpu.MilliValue())/1000.0, float64(availableCpu.MilliValue())/1000.0), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if requestedResources.memory.Cmp(currentApplicationUsage.memory) == 1 {
|
||||
if memoryLimit, present := quota.Spec.Hard[corev1.ResourceMemory]; present {
|
||||
availableMemory := memoryLimit
|
||||
availableMemory.Sub(currentNamespaceUsage.memory)
|
||||
if requestedResources.memory.Cmp(availableMemory) == 1 {
|
||||
return fmt.Sprintf("%s %s/%s requests too much memory (%dMi requested, %dMi available).", kind, namespace, name, requestedResources.memory.Value()/(1<<20), availableMemory.Value()/(1<<20)), nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (r *ResourceQuotaEnforcer) AdmitSparkApplication(app so.SparkApplication) (string, error) {
|
||||
resourceUsage, err := sparkApplicationResourceUsage(app)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return r.admitResource(KindSparkApplication, app.ObjectMeta.Namespace, app.ObjectMeta.Name, resourceUsage)
|
||||
}
|
||||
|
||||
func (r *ResourceQuotaEnforcer) AdmitScheduledSparkApplication(app so.ScheduledSparkApplication) (string, error) {
|
||||
resourceUsage, err := scheduledSparkApplicationResourceUsage(app)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return r.admitResource(KindScheduledSparkApplication, app.ObjectMeta.Namespace, app.ObjectMeta.Name, resourceUsage)
|
||||
}
|
|
@ -0,0 +1,119 @@
|
|||
package resourceusage
|
||||
|
||||
import (
|
||||
so "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1"
|
||||
|
||||
"github.com/golang/glog"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
func (r *ResourceUsageWatcher) onPodAdded(obj interface{}) {
|
||||
pod := obj.(*corev1.Pod)
|
||||
// A pod launched by the Spark operator will already be accounted for by the CRD informer callback
|
||||
if !launchedBySparkOperator(pod.ObjectMeta) {
|
||||
r.setResources("Pod", namespaceOrDefault(pod.ObjectMeta), pod.ObjectMeta.Name, podResourceUsage(pod), r.usageByNamespacePod)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onPodUpdated(oldObj, newObj interface{}) {
|
||||
newPod := newObj.(*corev1.Pod)
|
||||
if !launchedBySparkOperator(newPod.ObjectMeta) {
|
||||
if newPod.Status.Phase == corev1.PodFailed || newPod.Status.Phase == corev1.PodSucceeded {
|
||||
r.deleteResources("Pod", namespaceOrDefault(newPod.ObjectMeta), newPod.ObjectMeta.Name, r.usageByNamespacePod)
|
||||
} else {
|
||||
r.setResources("Pod", namespaceOrDefault(newPod.ObjectMeta), newPod.ObjectMeta.Name, podResourceUsage(newPod), r.usageByNamespacePod)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onPodDeleted(obj interface{}) {
|
||||
var pod *corev1.Pod
|
||||
switch o := obj.(type) {
|
||||
case *corev1.Pod:
|
||||
pod = o
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
pod = o.Obj.(*corev1.Pod)
|
||||
default:
|
||||
return
|
||||
}
|
||||
if !launchedBySparkOperator(pod.ObjectMeta) {
|
||||
r.deleteResources("Pod", namespaceOrDefault(pod.ObjectMeta), pod.ObjectMeta.Name, r.usageByNamespacePod)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onSparkApplicationAdded(obj interface{}) {
|
||||
app := obj.(*so.SparkApplication)
|
||||
namespace := namespaceOrDefault(app.ObjectMeta)
|
||||
resources, err := sparkApplicationResourceUsage(*app)
|
||||
if err != nil {
|
||||
glog.Errorf("failed to determine resource usage of SparkApplication %s/%s: %v", namespace, app.ObjectMeta.Name, err)
|
||||
} else {
|
||||
r.setResources(KindSparkApplication, namespace, app.ObjectMeta.Name, resources, r.usageByNamespaceApplication)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onSparkApplicationUpdated(oldObj, newObj interface{}) {
|
||||
oldApp := oldObj.(*so.SparkApplication)
|
||||
newApp := newObj.(*so.SparkApplication)
|
||||
if oldApp.ResourceVersion == newApp.ResourceVersion {
|
||||
return
|
||||
}
|
||||
namespace := namespaceOrDefault(newApp.ObjectMeta)
|
||||
newResources, err := sparkApplicationResourceUsage(*newApp)
|
||||
if err != nil {
|
||||
glog.Errorf("failed to determine resource useage of SparkApplication %s/%s: %v", namespace, newApp.ObjectMeta.Name, err)
|
||||
} else {
|
||||
r.setResources(KindSparkApplication, namespace, newApp.ObjectMeta.Name, newResources, r.usageByNamespaceApplication)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onSparkApplicationDeleted(obj interface{}) {
|
||||
var app *so.SparkApplication
|
||||
switch o := obj.(type) {
|
||||
case *so.SparkApplication:
|
||||
app = o
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
app = o.Obj.(*so.SparkApplication)
|
||||
default:
|
||||
return
|
||||
}
|
||||
namespace := namespaceOrDefault(app.ObjectMeta)
|
||||
r.deleteResources(KindSparkApplication, namespace, app.ObjectMeta.Name, r.usageByNamespaceApplication)
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationAdded(obj interface{}) {
|
||||
app := obj.(*so.ScheduledSparkApplication)
|
||||
namespace := namespaceOrDefault(app.ObjectMeta)
|
||||
resources, err := scheduledSparkApplicationResourceUsage(*app)
|
||||
if err != nil {
|
||||
glog.Errorf("failed to determine resource usage of ScheduledSparkApplication %s/%s: %v", namespace, app.ObjectMeta.Name, err)
|
||||
} else {
|
||||
r.setResources(KindScheduledSparkApplication, namespace, app.ObjectMeta.Name, resources, r.usageByNamespaceScheduledApplication)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationUpdated(oldObj, newObj interface{}) {
|
||||
newApp := oldObj.(*so.ScheduledSparkApplication)
|
||||
namespace := namespaceOrDefault(newApp.ObjectMeta)
|
||||
newResources, err := scheduledSparkApplicationResourceUsage(*newApp)
|
||||
if err != nil {
|
||||
glog.Errorf("failed to determine resource usage of ScheduledSparkApplication %s/%s: %v", namespace, newApp.ObjectMeta.Name, err)
|
||||
} else {
|
||||
r.setResources(KindSparkApplication, namespace, newApp.ObjectMeta.Name, newResources, r.usageByNamespaceScheduledApplication)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationDeleted(obj interface{}) {
|
||||
var app *so.ScheduledSparkApplication
|
||||
switch o := obj.(type) {
|
||||
case *so.ScheduledSparkApplication:
|
||||
app = o
|
||||
case cache.DeletedFinalStateUnknown:
|
||||
app = o.Obj.(*so.ScheduledSparkApplication)
|
||||
default:
|
||||
return
|
||||
}
|
||||
namespace := namespaceOrDefault(app.ObjectMeta)
|
||||
r.deleteResources(KindScheduledSparkApplication, namespace, app.ObjectMeta.Name, r.usageByNamespaceScheduledApplication)
|
||||
}
|
|
@ -0,0 +1,241 @@
|
|||
package resourceusage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
so "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1"
|
||||
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/config"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"math"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ...are you serious, Go?
|
||||
func max(x, y int64) int64 {
|
||||
if x > y {
|
||||
return x
|
||||
}
|
||||
return y
|
||||
}
|
||||
|
||||
const (
|
||||
// https://spark.apache.org/docs/latest/configuration.html
|
||||
defaultCpuMillicores = 1000
|
||||
defaultMemoryBytes = 1 << 30 // 1Gi
|
||||
defaultMemoryOverhead = 0.1
|
||||
|
||||
// https://github.com/apache/spark/blob/c4bbfd177b4e7cb46f47b39df9fd71d2d9a12c6d/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala#L85
|
||||
minMemoryOverhead = 384 * (1 << 20) // 384Mi
|
||||
nonJvmDefaultMemoryOverhead = 0.4
|
||||
)
|
||||
|
||||
func namespaceOrDefault(meta metav1.ObjectMeta) string {
|
||||
namespace := meta.Namespace
|
||||
if namespace == "" {
|
||||
namespace = "default"
|
||||
}
|
||||
return namespace
|
||||
}
|
||||
|
||||
func launchedBySparkOperator(meta metav1.ObjectMeta) bool {
|
||||
val, present := meta.Labels[config.LaunchedBySparkOperatorLabel]
|
||||
return present && val == "true"
|
||||
}
|
||||
|
||||
func resourcesRequiredToSchedule(resourceRequirements corev1.ResourceRequirements) (cpu int64, memoryBytes int64) {
|
||||
if coresRequest, present := resourceRequirements.Requests[corev1.ResourceCPU]; present {
|
||||
cpu = coresRequest.MilliValue()
|
||||
} else if coresLimit, present := resourceRequirements.Limits[corev1.ResourceCPU]; present {
|
||||
cpu = coresLimit.MilliValue()
|
||||
}
|
||||
if memoryRequest, present := resourceRequirements.Requests[corev1.ResourceMemory]; present {
|
||||
memoryBytes = memoryRequest.Value()
|
||||
} else if memoryLimit, present := resourceRequirements.Limits[corev1.ResourceMemory]; present {
|
||||
memoryBytes = memoryLimit.Value()
|
||||
}
|
||||
return cpu, memoryBytes
|
||||
}
|
||||
|
||||
func coresRequiredForSparkPod(spec so.SparkPodSpec, instances int64) (int64, error) {
|
||||
var cpu int64
|
||||
if spec.Cores != nil {
|
||||
cpu = int64(*spec.Cores * 1000)
|
||||
} else {
|
||||
cpu = defaultCpuMillicores
|
||||
}
|
||||
return cpu * instances, nil
|
||||
}
|
||||
|
||||
var javaStringSuffixes = map[string]int64{
|
||||
"b": 1,
|
||||
"kb": 1 << 10,
|
||||
"k": 1 << 10,
|
||||
"mb": 1 << 20,
|
||||
"m": 1 << 20,
|
||||
"gb": 1 << 30,
|
||||
"g": 1 << 30,
|
||||
"tb": 1 << 40,
|
||||
"t": 1 << 40,
|
||||
"pb": 1 << 50,
|
||||
"p": 1 << 50,
|
||||
}
|
||||
|
||||
var javaStringPattern = regexp.MustCompile(`([0-9]+)([a-z]+)?`)
|
||||
var javaFractionStringPattern = regexp.MustCompile(`([0-9]+\.[0-9]+)([a-z]+)?`)
|
||||
|
||||
// Logic copied from https://github.com/apache/spark/blob/5264164a67df498b73facae207eda12ee133be7d/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java#L276
|
||||
func parseJavaMemoryString(str string) (int64, error) {
|
||||
lower := strings.ToLower(str)
|
||||
if matches := javaStringPattern.FindStringSubmatch(lower); matches != nil {
|
||||
value, err := strconv.ParseInt(matches[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
suffix := matches[2]
|
||||
if multiplier, present := javaStringSuffixes[suffix]; present {
|
||||
return multiplier * value, nil
|
||||
}
|
||||
} else if matches = javaFractionStringPattern.FindStringSubmatch(lower); matches != nil {
|
||||
value, err := strconv.ParseFloat(matches[1], 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
suffix := matches[2]
|
||||
if multiplier, present := javaStringSuffixes[suffix]; present {
|
||||
return int64(float64(multiplier) * value), nil
|
||||
}
|
||||
}
|
||||
return 0, fmt.Errorf("could not parse string '%s' as a Java-style memory value. Examples: 100kb, 1.5mb, 1g", str)
|
||||
}
|
||||
|
||||
// Logic copied from https://github.com/apache/spark/blob/c4bbfd177b4e7cb46f47b39df9fd71d2d9a12c6d/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
|
||||
func memoryRequiredForSparkPod(spec so.SparkPodSpec, memoryOverheadFactor *string, appType so.SparkApplicationType, replicas int64) (int64, error) {
|
||||
var memoryBytes int64
|
||||
if spec.Memory != nil {
|
||||
memory, err := parseJavaMemoryString(*spec.Memory)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
memoryBytes = memory
|
||||
} else {
|
||||
memoryBytes = defaultMemoryBytes
|
||||
}
|
||||
var memoryOverheadBytes int64
|
||||
if spec.MemoryOverhead != nil {
|
||||
overhead, err := parseJavaMemoryString(*spec.MemoryOverhead)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
memoryOverheadBytes = overhead
|
||||
} else {
|
||||
var overheadFactor float64
|
||||
if memoryOverheadFactor != nil {
|
||||
overheadFactorScope, err := strconv.ParseFloat(*memoryOverheadFactor, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
overheadFactor = overheadFactorScope
|
||||
} else {
|
||||
if appType == so.JavaApplicationType {
|
||||
overheadFactor = defaultMemoryOverhead
|
||||
} else {
|
||||
overheadFactor = nonJvmDefaultMemoryOverhead
|
||||
}
|
||||
}
|
||||
memoryOverheadBytes = int64(math.Max(overheadFactor*float64(memoryBytes), minMemoryOverhead))
|
||||
}
|
||||
return (memoryBytes + memoryOverheadBytes) * replicas, nil
|
||||
}
|
||||
|
||||
func resourceUsage(spec so.SparkApplicationSpec) (ResourceList, error) {
|
||||
driverMemoryOverheadFactor := spec.MemoryOverheadFactor
|
||||
executorMemoryOverheadFactor := spec.MemoryOverheadFactor
|
||||
driverMemory, err := memoryRequiredForSparkPod(spec.Driver.SparkPodSpec, driverMemoryOverheadFactor, spec.Type, 1)
|
||||
if err != nil {
|
||||
return ResourceList{}, err
|
||||
}
|
||||
|
||||
var instances int64 = 1
|
||||
if spec.Executor.Instances != nil {
|
||||
instances = int64(*spec.Executor.Instances)
|
||||
}
|
||||
executorMemory, err := memoryRequiredForSparkPod(spec.Executor.SparkPodSpec, executorMemoryOverheadFactor, spec.Type, instances)
|
||||
if err != nil {
|
||||
return ResourceList{}, err
|
||||
}
|
||||
|
||||
driverCores, err := coresRequiredForSparkPod(spec.Driver.SparkPodSpec, 1)
|
||||
if err != nil {
|
||||
return ResourceList{}, err
|
||||
}
|
||||
|
||||
executorCores, err := coresRequiredForSparkPod(spec.Executor.SparkPodSpec, instances)
|
||||
if err != nil {
|
||||
return ResourceList{}, err
|
||||
}
|
||||
|
||||
return ResourceList{
|
||||
cpu: *resource.NewMilliQuantity(driverCores+executorCores, resource.DecimalSI),
|
||||
memory: *resource.NewQuantity(driverMemory+executorMemory, resource.DecimalSI),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func sparkApplicationResourceUsage(sparkApp so.SparkApplication) (ResourceList, error) {
|
||||
// A completed/failed SparkApplication consumes no resources
|
||||
if !sparkApp.Status.TerminationTime.IsZero() || sparkApp.Status.AppState.State == so.FailedState || sparkApp.Status.AppState.State == so.CompletedState {
|
||||
return ResourceList{}, nil
|
||||
}
|
||||
return resourceUsage(sparkApp.Spec)
|
||||
}
|
||||
|
||||
func scheduledSparkApplicationResourceUsage(sparkApp so.ScheduledSparkApplication) (ResourceList, error) {
|
||||
// Failed validation, will consume no resources
|
||||
if sparkApp.Status.ScheduleState == so.FailedValidationState {
|
||||
return ResourceList{}, nil
|
||||
}
|
||||
return resourceUsage(sparkApp.Spec.Template)
|
||||
}
|
||||
|
||||
func podResourceUsage(pod *corev1.Pod) ResourceList {
|
||||
spec := pod.Spec
|
||||
var initCores int64
|
||||
var initMemoryBytes int64
|
||||
completed := make(map[string]struct{})
|
||||
|
||||
for _, containerStatus := range pod.Status.InitContainerStatuses {
|
||||
if containerStatus.State.Terminated != nil {
|
||||
completed[containerStatus.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
for _, containerStatus := range pod.Status.ContainerStatuses {
|
||||
if containerStatus.State.Terminated != nil {
|
||||
completed[containerStatus.Name] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
for _, container := range spec.InitContainers {
|
||||
if _, present := completed[container.Name]; !present {
|
||||
c, m := resourcesRequiredToSchedule(container.Resources)
|
||||
initCores = max(c, initCores)
|
||||
initMemoryBytes = max(m, initMemoryBytes)
|
||||
}
|
||||
}
|
||||
var cores int64
|
||||
var memoryBytes int64
|
||||
for _, container := range spec.Containers {
|
||||
if _, present := completed[container.Name]; !present {
|
||||
c, m := resourcesRequiredToSchedule(container.Resources)
|
||||
cores += c
|
||||
memoryBytes += m
|
||||
}
|
||||
}
|
||||
cores = max(initCores, cores)
|
||||
memoryBytes = max(initMemoryBytes, memoryBytes)
|
||||
return ResourceList{
|
||||
cpu: *resource.NewMilliQuantity(cores, resource.DecimalSI),
|
||||
memory: *resource.NewQuantity(memoryBytes, resource.DecimalSI),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
package resourceusage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func assertMemory(memoryString string, expectedBytes int64, t *testing.T) {
|
||||
m, err := parseJavaMemoryString(memoryString)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
if m != expectedBytes {
|
||||
t.Errorf("%s: expected %v bytes, got %v bytes", memoryString, expectedBytes, m)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestJavaMemoryString(t *testing.T) {
|
||||
assertMemory("1b", 1, t)
|
||||
assertMemory("100k", 100*1024, t)
|
||||
assertMemory("1gb", 1024*1024*1024, t)
|
||||
assertMemory("10TB", 10*1024*1024*1024*1024, t)
|
||||
assertMemory("10PB", 10*1024*1024*1024*1024*1024, t)
|
||||
}
|
|
@ -0,0 +1,157 @@
|
|||
package resourceusage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
crdinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/client-go/informers"
|
||||
corev1informers "k8s.io/client-go/informers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
)
|
||||
|
||||
type ResourceUsageWatcher struct {
|
||||
currentUsageLock *sync.RWMutex
|
||||
currentUsageByNamespace map[string]*ResourceList
|
||||
usageByNamespacePod map[string]map[string]*ResourceList
|
||||
usageByNamespaceScheduledApplication map[string]map[string]*ResourceList
|
||||
usageByNamespaceApplication map[string]map[string]*ResourceList
|
||||
crdInformerFactory crdinformers.SharedInformerFactory
|
||||
coreV1InformerFactory informers.SharedInformerFactory
|
||||
podInformer corev1informers.PodInformer
|
||||
}
|
||||
|
||||
// more convenient replacement for corev1.ResourceList
|
||||
type ResourceList struct {
|
||||
cpu resource.Quantity
|
||||
memory resource.Quantity
|
||||
}
|
||||
|
||||
const (
|
||||
KindSparkApplication = "SparkApplication"
|
||||
KindScheduledSparkApplication = "ScheduledSparkApplication"
|
||||
)
|
||||
|
||||
func (r ResourceList) String() string {
|
||||
return fmt.Sprintf("cpu: %v mcpu, memory %v bytes", r.cpu.MilliValue(), r.memory.Value())
|
||||
}
|
||||
|
||||
func newResourceUsageWatcher(crdInformerFactory crdinformers.SharedInformerFactory, coreV1InformerFactory informers.SharedInformerFactory) ResourceUsageWatcher {
|
||||
glog.V(2).Infof("Creating new resource usage watcher")
|
||||
r := ResourceUsageWatcher{
|
||||
crdInformerFactory: crdInformerFactory,
|
||||
currentUsageLock: &sync.RWMutex{},
|
||||
coreV1InformerFactory: coreV1InformerFactory,
|
||||
currentUsageByNamespace: make(map[string]*ResourceList),
|
||||
usageByNamespacePod: make(map[string]map[string]*ResourceList),
|
||||
usageByNamespaceScheduledApplication: make(map[string]map[string]*ResourceList),
|
||||
usageByNamespaceApplication: make(map[string]map[string]*ResourceList),
|
||||
}
|
||||
// Note: Events for each handler are processed serially, so no coordination is needed between
|
||||
// the different callbacks. Coordination is still needed around updating the shared state.
|
||||
sparkApplicationInformer := r.crdInformerFactory.Sparkoperator().V1beta1().SparkApplications()
|
||||
sparkApplicationInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: r.onSparkApplicationAdded,
|
||||
UpdateFunc: r.onSparkApplicationUpdated,
|
||||
DeleteFunc: r.onSparkApplicationDeleted,
|
||||
})
|
||||
scheduledSparkApplicationInformer := r.crdInformerFactory.Sparkoperator().V1beta1().ScheduledSparkApplications()
|
||||
scheduledSparkApplicationInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: r.onScheduledSparkApplicationAdded,
|
||||
UpdateFunc: r.onScheduledSparkApplicationUpdated,
|
||||
DeleteFunc: r.onScheduledSparkApplicationDeleted,
|
||||
})
|
||||
r.podInformer = r.coreV1InformerFactory.Core().V1().Pods()
|
||||
r.podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
||||
AddFunc: r.onPodAdded,
|
||||
UpdateFunc: r.onPodUpdated,
|
||||
DeleteFunc: r.onPodDeleted,
|
||||
})
|
||||
return r
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) GetCurrentResourceUsage(namespace string) ResourceList {
|
||||
r.currentUsageLock.RLock()
|
||||
defer r.currentUsageLock.RUnlock()
|
||||
if resourceUsageInternal, present := r.currentUsageByNamespace[namespace]; present {
|
||||
return ResourceList{
|
||||
cpu: resourceUsageInternal.cpu,
|
||||
memory: resourceUsageInternal.memory,
|
||||
}
|
||||
}
|
||||
return ResourceList{}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) GetCurrentResourceUsageWithApplication(namespace, kind, name string) (namespaceResources, applicationResources ResourceList) {
|
||||
r.currentUsageLock.RLock()
|
||||
defer r.currentUsageLock.RUnlock()
|
||||
if resourceUsageInternal, present := r.currentUsageByNamespace[namespace]; present {
|
||||
var applicationResources ResourceList
|
||||
var namespaceMap map[string]map[string]*ResourceList
|
||||
switch kind {
|
||||
case KindSparkApplication:
|
||||
namespaceMap = r.usageByNamespaceApplication
|
||||
case KindScheduledSparkApplication:
|
||||
namespaceMap = r.usageByNamespaceScheduledApplication
|
||||
}
|
||||
if applicationMap, present := namespaceMap[namespace]; present {
|
||||
if ar, present := applicationMap[name]; present {
|
||||
applicationResources = *ar
|
||||
}
|
||||
}
|
||||
currentUsage := *resourceUsageInternal // Creates a copy
|
||||
currentUsage.cpu.Sub(applicationResources.cpu)
|
||||
currentUsage.memory.Sub(applicationResources.memory)
|
||||
return currentUsage, applicationResources
|
||||
}
|
||||
return ResourceList{}, ResourceList{}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) unsafeSetResources(namespace, name string, resources ResourceList, resourceMap map[string]map[string]*ResourceList) {
|
||||
if _, present := resourceMap[namespace]; !present {
|
||||
resourceMap[namespace] = make(map[string]*ResourceList)
|
||||
}
|
||||
// Clear any resource usage currently stored for this object
|
||||
r.unsafeDeleteResources(namespace, name, resourceMap)
|
||||
resourceMap[namespace][name] = &resources
|
||||
if current, present := r.currentUsageByNamespace[namespace]; present {
|
||||
current.cpu.Add(resources.cpu)
|
||||
current.memory.Add(resources.memory)
|
||||
} else {
|
||||
r.currentUsageByNamespace[namespace] = &ResourceList{
|
||||
cpu: resources.cpu,
|
||||
memory: resources.memory,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) unsafeDeleteResources(namespace, name string, resourceMap map[string]map[string]*ResourceList) {
|
||||
if namespaceMap, present := resourceMap[namespace]; present {
|
||||
if resources, present := namespaceMap[name]; present {
|
||||
delete(resourceMap[namespace], name)
|
||||
if current, present := r.currentUsageByNamespace[namespace]; present {
|
||||
current.cpu.Sub(resources.cpu)
|
||||
current.memory.Sub(resources.memory)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) setResources(typeName, namespace, name string, resources ResourceList, resourceMap map[string]map[string]*ResourceList) {
|
||||
glog.V(3).Infof("Updating object %s %s/%s with resources %v", typeName, namespace, name, resources)
|
||||
r.currentUsageLock.Lock()
|
||||
r.unsafeSetResources(namespace, name, resources, resourceMap)
|
||||
r.currentUsageLock.Unlock()
|
||||
glog.V(3).Infof("Current resources for namespace %s: %v", namespace, r.currentUsageByNamespace[namespace])
|
||||
}
|
||||
|
||||
func (r *ResourceUsageWatcher) deleteResources(typeName, namespace, name string, resourceMap map[string]map[string]*ResourceList) {
|
||||
glog.V(3).Infof("Deleting resources from object %s/%s", namespace, name)
|
||||
r.currentUsageLock.Lock()
|
||||
r.unsafeDeleteResources(namespace, name, resourceMap)
|
||||
r.currentUsageLock.Unlock()
|
||||
glog.V(3).Infof("Current resources for namespace %s: %v", namespace, r.currentUsageByNamespace[namespace])
|
||||
}
|
|
@ -36,16 +36,21 @@ import (
|
|||
"k8s.io/apimachinery/pkg/api/equality"
|
||||
"k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
|
||||
crdapi "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io"
|
||||
crdv1beta1 "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1"
|
||||
crinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions"
|
||||
crdlisters "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/listers/sparkoperator.k8s.io/v1beta1"
|
||||
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/config"
|
||||
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/util"
|
||||
"github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/webhook/resourceusage"
|
||||
)
|
||||
|
||||
const (
|
||||
webhookName = "webhook.sparkoperator.k8s.io"
|
||||
webhookName = "webhook.sparkoperator.k8s.io"
|
||||
quotaWebhookName = "quotaenforcer.sparkoperator.k8s.io"
|
||||
)
|
||||
|
||||
var podResource = metav1.GroupVersionResource{
|
||||
|
@ -54,17 +59,33 @@ var podResource = metav1.GroupVersionResource{
|
|||
Resource: "pods",
|
||||
}
|
||||
|
||||
var sparkApplicationResource = metav1.GroupVersionResource{
|
||||
Group: crdapi.GroupName,
|
||||
Version: crdv1beta1.Version,
|
||||
Resource: "sparkapplications",
|
||||
}
|
||||
|
||||
var scheduledSparkApplicationResource = metav1.GroupVersionResource{
|
||||
Group: crdapi.GroupName,
|
||||
Version: crdv1beta1.Version,
|
||||
Resource: "scheduledsparkapplications",
|
||||
}
|
||||
|
||||
// WebHook encapsulates things needed to run the webhook.
|
||||
type WebHook struct {
|
||||
clientset kubernetes.Interface
|
||||
lister crdlisters.SparkApplicationLister
|
||||
server *http.Server
|
||||
certProvider *certProvider
|
||||
serviceRef *v1beta1.ServiceReference
|
||||
failurePolicy v1beta1.FailurePolicyType
|
||||
selector *metav1.LabelSelector
|
||||
sparkJobNamespace string
|
||||
deregisterOnExit bool
|
||||
clientset kubernetes.Interface
|
||||
informerFactory crinformers.SharedInformerFactory
|
||||
lister crdlisters.SparkApplicationLister
|
||||
server *http.Server
|
||||
certProvider *certProvider
|
||||
serviceRef *v1beta1.ServiceReference
|
||||
failurePolicy v1beta1.FailurePolicyType
|
||||
selector *metav1.LabelSelector
|
||||
sparkJobNamespace string
|
||||
deregisterOnExit bool
|
||||
enableResourceQuotaEnforcement bool
|
||||
resourceQuotaEnforcer resourceusage.ResourceQuotaEnforcer
|
||||
coreV1InformerFactory informers.SharedInformerFactory
|
||||
}
|
||||
|
||||
// Configuration parsed from command-line flags
|
||||
|
@ -101,7 +122,10 @@ func New(
|
|||
clientset kubernetes.Interface,
|
||||
informerFactory crinformers.SharedInformerFactory,
|
||||
jobNamespace string,
|
||||
deregisterOnExit bool) (*WebHook, error) {
|
||||
deregisterOnExit bool,
|
||||
enableResourceQuotaEnforcement bool,
|
||||
coreV1InformerFactory informers.SharedInformerFactory) (*WebHook, error) {
|
||||
|
||||
cert, err := NewCertProvider(
|
||||
userConfig.serverCert,
|
||||
userConfig.serverCertKey,
|
||||
|
@ -119,14 +143,18 @@ func New(
|
|||
Path: &path,
|
||||
}
|
||||
hook := &WebHook{
|
||||
clientset: clientset,
|
||||
lister: informerFactory.Sparkoperator().V1beta1().SparkApplications().Lister(),
|
||||
certProvider: cert,
|
||||
serviceRef: serviceRef,
|
||||
sparkJobNamespace: jobNamespace,
|
||||
deregisterOnExit: deregisterOnExit,
|
||||
failurePolicy: arv1beta1.Ignore,
|
||||
clientset: clientset,
|
||||
informerFactory: informerFactory,
|
||||
lister: informerFactory.Sparkoperator().V1beta1().SparkApplications().Lister(),
|
||||
certProvider: cert,
|
||||
serviceRef: serviceRef,
|
||||
sparkJobNamespace: jobNamespace,
|
||||
deregisterOnExit: deregisterOnExit,
|
||||
failurePolicy: arv1beta1.Ignore,
|
||||
coreV1InformerFactory: coreV1InformerFactory,
|
||||
enableResourceQuotaEnforcement: enableResourceQuotaEnforcement,
|
||||
}
|
||||
|
||||
if userConfig.webhookFailOnError {
|
||||
if userConfig.webhookNamespaceSelector == "" {
|
||||
return nil, fmt.Errorf("webhook-namespace-selector must be set when webhook-fail-on-error is true")
|
||||
|
@ -168,14 +196,23 @@ func parseNamespaceSelector(selectorArg string) (*metav1.LabelSelector, error) {
|
|||
}
|
||||
|
||||
// Start starts the admission webhook server and registers itself to the API server.
|
||||
func (wh *WebHook) Start() error {
|
||||
func (wh *WebHook) Start(stopCh <-chan struct{}) error {
|
||||
wh.certProvider.Start()
|
||||
wh.server.TLSConfig = wh.certProvider.tlsConfig()
|
||||
|
||||
if wh.enableResourceQuotaEnforcement {
|
||||
resourceQuotaEnforcer := resourceusage.NewResourceQuotaEnforcer(wh.informerFactory, wh.coreV1InformerFactory)
|
||||
err := resourceQuotaEnforcer.WaitForCacheSync(stopCh)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
wh.resourceQuotaEnforcer = resourceQuotaEnforcer
|
||||
}
|
||||
|
||||
go func() {
|
||||
glog.Info("Starting the Spark pod admission webhook server")
|
||||
glog.Info("Starting the Spark admission webhook server")
|
||||
if err := wh.server.ListenAndServeTLS("", ""); err != nil && err != http.ErrServerClosed {
|
||||
glog.Errorf("error while serving the Spark pod admission webhook: %v\n", err)
|
||||
glog.Errorf("error while serving the Spark admission webhook: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
|
@ -229,15 +266,29 @@ func (wh *WebHook) serve(w http.ResponseWriter, r *http.Request) {
|
|||
internalError(w, err)
|
||||
return
|
||||
}
|
||||
|
||||
if review.Request.Resource != podResource {
|
||||
denyRequest(w, fmt.Sprintf("unexpected resource type: %v", review.Request.Resource.String()), http.StatusUnsupportedMediaType)
|
||||
var whErr error
|
||||
var reviewResponse *admissionv1beta1.AdmissionResponse
|
||||
switch review.Request.Resource {
|
||||
case podResource:
|
||||
reviewResponse, whErr = mutatePods(review, wh.lister, wh.sparkJobNamespace)
|
||||
case sparkApplicationResource:
|
||||
if !wh.enableResourceQuotaEnforcement {
|
||||
unexpectedResourceType(w, review.Request.Resource.String())
|
||||
return
|
||||
}
|
||||
reviewResponse, whErr = admitSparkApplications(review, wh.resourceQuotaEnforcer)
|
||||
case scheduledSparkApplicationResource:
|
||||
if !wh.enableResourceQuotaEnforcement {
|
||||
unexpectedResourceType(w, review.Request.Resource.String())
|
||||
return
|
||||
}
|
||||
reviewResponse, whErr = admitScheduledSparkApplications(review, wh.resourceQuotaEnforcer)
|
||||
default:
|
||||
unexpectedResourceType(w, review.Request.Resource.String())
|
||||
return
|
||||
}
|
||||
|
||||
reviewResponse, err := mutatePods(review, wh.lister, wh.sparkJobNamespace)
|
||||
if err != nil {
|
||||
internalError(w, err)
|
||||
if whErr != nil {
|
||||
internalError(w, whErr)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -259,6 +310,10 @@ func (wh *WebHook) serve(w http.ResponseWriter, r *http.Request) {
|
|||
}
|
||||
}
|
||||
|
||||
func unexpectedResourceType(w http.ResponseWriter, kind string) {
|
||||
denyRequest(w, fmt.Sprintf("unexpected resource type: %v", kind), http.StatusUnsupportedMediaType)
|
||||
}
|
||||
|
||||
func internalError(w http.ResponseWriter, err error) {
|
||||
glog.Errorf("internal error: %v", err)
|
||||
denyRequest(w, err.Error(), 500)
|
||||
|
@ -288,28 +343,39 @@ func denyRequest(w http.ResponseWriter, reason string, code int) {
|
|||
}
|
||||
|
||||
func (wh *WebHook) selfRegistration(webhookConfigName string) error {
|
||||
client := wh.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations()
|
||||
existing, getErr := client.Get(webhookConfigName, metav1.GetOptions{})
|
||||
if getErr != nil && !errors.IsNotFound(getErr) {
|
||||
return getErr
|
||||
}
|
||||
mwcClient := wh.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations()
|
||||
vwcClient := wh.clientset.AdmissionregistrationV1beta1().ValidatingWebhookConfigurations()
|
||||
|
||||
caCert, err := readCertFile(wh.certProvider.caCertFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
webhook := v1beta1.Webhook{
|
||||
Name: webhookName,
|
||||
Rules: []v1beta1.RuleWithOperations{
|
||||
{
|
||||
Operations: []v1beta1.OperationType{v1beta1.Create},
|
||||
Rule: v1beta1.Rule{
|
||||
APIGroups: []string{""},
|
||||
APIVersions: []string{"v1"},
|
||||
Resources: []string{"pods"},
|
||||
},
|
||||
|
||||
mutatingRules := []v1beta1.RuleWithOperations{
|
||||
{
|
||||
Operations: []v1beta1.OperationType{v1beta1.Create},
|
||||
Rule: v1beta1.Rule{
|
||||
APIGroups: []string{""},
|
||||
APIVersions: []string{"v1"},
|
||||
Resources: []string{"pods"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
validatingRules := []v1beta1.RuleWithOperations{
|
||||
{
|
||||
Operations: []v1beta1.OperationType{v1beta1.Create, v1beta1.Update},
|
||||
Rule: v1beta1.Rule{
|
||||
APIGroups: []string{crdapi.GroupName},
|
||||
APIVersions: []string{crdv1beta1.Version},
|
||||
Resources: []string{sparkApplicationResource.Resource, scheduledSparkApplicationResource.Resource},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
mutatingWebhook := v1beta1.Webhook{
|
||||
Name: webhookName,
|
||||
Rules: mutatingRules,
|
||||
ClientConfig: v1beta1.WebhookClientConfig{
|
||||
Service: wh.serviceRef,
|
||||
CABundle: caCert,
|
||||
|
@ -317,36 +383,140 @@ func (wh *WebHook) selfRegistration(webhookConfigName string) error {
|
|||
FailurePolicy: &wh.failurePolicy,
|
||||
NamespaceSelector: wh.selector,
|
||||
}
|
||||
webhooks := []v1beta1.Webhook{webhook}
|
||||
|
||||
if getErr == nil && existing != nil {
|
||||
// Update case.
|
||||
glog.Info("Updating existing MutatingWebhookConfiguration for the Spark pod admission webhook")
|
||||
if !equality.Semantic.DeepEqual(webhooks, existing.Webhooks) {
|
||||
existing.Webhooks = webhooks
|
||||
if _, err := client.Update(existing); err != nil {
|
||||
return err
|
||||
}
|
||||
validatingWebhook := v1beta1.Webhook{
|
||||
Name: quotaWebhookName,
|
||||
Rules: validatingRules,
|
||||
ClientConfig: v1beta1.WebhookClientConfig{
|
||||
Service: wh.serviceRef,
|
||||
CABundle: caCert,
|
||||
},
|
||||
FailurePolicy: &wh.failurePolicy,
|
||||
NamespaceSelector: wh.selector,
|
||||
}
|
||||
|
||||
mutatingWebhooks := []v1beta1.Webhook{mutatingWebhook}
|
||||
validatingWebhooks := []v1beta1.Webhook{validatingWebhook}
|
||||
|
||||
mutatingExisting, mutatingGetErr := mwcClient.Get(webhookConfigName, metav1.GetOptions{})
|
||||
if mutatingGetErr != nil {
|
||||
if !errors.IsNotFound(mutatingGetErr) {
|
||||
return mutatingGetErr
|
||||
}
|
||||
} else {
|
||||
// Create case.
|
||||
glog.Info("Creating a MutatingWebhookConfiguration for the Spark pod admission webhook")
|
||||
webhookConfig := &v1beta1.MutatingWebhookConfiguration{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: webhookConfigName,
|
||||
},
|
||||
Webhooks: webhooks,
|
||||
Webhooks: mutatingWebhooks,
|
||||
}
|
||||
if _, err := client.Create(webhookConfig); err != nil {
|
||||
if _, err := mwcClient.Create(webhookConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Update case.
|
||||
glog.Info("Updating existing MutatingWebhookConfiguration for the Spark pod admission webhook")
|
||||
if !equality.Semantic.DeepEqual(mutatingWebhooks, mutatingExisting.Webhooks) {
|
||||
mutatingExisting.Webhooks = mutatingWebhooks
|
||||
if _, err := mwcClient.Update(mutatingExisting); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if wh.enableResourceQuotaEnforcement {
|
||||
validatingExisting, validatingGetErr := vwcClient.Get(webhookConfigName, metav1.GetOptions{})
|
||||
if validatingGetErr != nil {
|
||||
if !errors.IsNotFound(validatingGetErr) {
|
||||
return validatingGetErr
|
||||
}
|
||||
// Create case.
|
||||
glog.Info("Creating a ValidatingWebhookConfiguration for the SparkApplication resource quota enforcement webhook")
|
||||
webhookConfig := &v1beta1.ValidatingWebhookConfiguration{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: webhookConfigName,
|
||||
},
|
||||
Webhooks: validatingWebhooks,
|
||||
}
|
||||
if _, err := vwcClient.Create(webhookConfig); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
} else {
|
||||
// Update case.
|
||||
glog.Info("Updating existing ValidatingWebhookConfiguration for the SparkApplication resource quota enforcement webhook")
|
||||
if !equality.Semantic.DeepEqual(validatingWebhooks, validatingExisting.Webhooks) {
|
||||
validatingExisting.Webhooks = validatingWebhooks
|
||||
if _, err := vwcClient.Update(validatingExisting); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (wh *WebHook) selfDeregistration(webhookConfigName string) error {
|
||||
client := wh.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations()
|
||||
return client.Delete(webhookConfigName, metav1.NewDeleteOptions(0))
|
||||
mutatingConfigs := wh.clientset.AdmissionregistrationV1beta1().MutatingWebhookConfigurations()
|
||||
validatingConfigs := wh.clientset.AdmissionregistrationV1beta1().ValidatingWebhookConfigurations()
|
||||
if wh.enableResourceQuotaEnforcement {
|
||||
err := validatingConfigs.Delete(webhookConfigName, metav1.NewDeleteOptions(0))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return mutatingConfigs.Delete(webhookConfigName, metav1.NewDeleteOptions(0))
|
||||
}
|
||||
|
||||
func admitSparkApplications(review *admissionv1beta1.AdmissionReview, enforcer resourceusage.ResourceQuotaEnforcer) (*admissionv1beta1.AdmissionResponse, error) {
|
||||
if review.Request.Resource != sparkApplicationResource {
|
||||
return nil, fmt.Errorf("expected resource to be %s, got %s", sparkApplicationResource, review.Request.Resource)
|
||||
}
|
||||
|
||||
raw := review.Request.Object.Raw
|
||||
app := &crdv1beta1.SparkApplication{}
|
||||
if err := json.Unmarshal(raw, app); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal a SparkApplication from the raw data in the admission request: %v", err)
|
||||
}
|
||||
|
||||
reason, err := enforcer.AdmitSparkApplication(*app)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("resource quota enforcement failed for SparkApplication: %v", err)
|
||||
}
|
||||
response := &admissionv1beta1.AdmissionResponse{Allowed: reason == ""}
|
||||
if reason != "" {
|
||||
response.Result = &metav1.Status{
|
||||
Message: reason,
|
||||
Code: 400,
|
||||
}
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func admitScheduledSparkApplications(review *admissionv1beta1.AdmissionReview, enforcer resourceusage.ResourceQuotaEnforcer) (*admissionv1beta1.AdmissionResponse, error) {
|
||||
if review.Request.Resource != scheduledSparkApplicationResource {
|
||||
return nil, fmt.Errorf("expected resource to be %s, got %s", scheduledSparkApplicationResource, review.Request.Resource)
|
||||
}
|
||||
|
||||
raw := review.Request.Object.Raw
|
||||
app := &crdv1beta1.ScheduledSparkApplication{}
|
||||
if err := json.Unmarshal(raw, app); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal a ScheduledSparkApplication from the raw data in the admission request: %v", err)
|
||||
}
|
||||
|
||||
response := &admissionv1beta1.AdmissionResponse{Allowed: true}
|
||||
reason, err := enforcer.AdmitScheduledSparkApplication(*app)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("resource quota enforcement failed for ScheduledSparkApplication: %v", err)
|
||||
} else if reason != "" {
|
||||
response.Allowed = false
|
||||
response.Result = &metav1.Status{
|
||||
Message: reason,
|
||||
Code: 400,
|
||||
}
|
||||
}
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func mutatePods(
|
||||
|
|
Loading…
Reference in New Issue