spark-operator/cmd/operator/controller/start.go

374 lines
14 KiB
Go

/*
Copyright 2024 The Kubeflow authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"crypto/tls"
"flag"
"os"
"slices"
"time"
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
"k8s.io/utils/clock"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/healthz"
logzap "sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
ctrlwebhook "sigs.k8s.io/controller-runtime/pkg/webhook"
sparkoperator "github.com/kubeflow/spark-operator"
"github.com/kubeflow/spark-operator/api/v1beta1"
"github.com/kubeflow/spark-operator/api/v1beta2"
"github.com/kubeflow/spark-operator/internal/controller/scheduledsparkapplication"
"github.com/kubeflow/spark-operator/internal/controller/sparkapplication"
"github.com/kubeflow/spark-operator/internal/metrics"
"github.com/kubeflow/spark-operator/internal/scheduler"
"github.com/kubeflow/spark-operator/internal/scheduler/volcano"
"github.com/kubeflow/spark-operator/internal/scheduler/yunikorn"
"github.com/kubeflow/spark-operator/pkg/common"
"github.com/kubeflow/spark-operator/pkg/util"
// +kubebuilder:scaffold:imports
)
var (
scheme = runtime.NewScheme()
logger = ctrl.Log.WithName("")
)
var (
namespaces []string
// Controller
controllerThreads int
cacheSyncTimeout time.Duration
// Batch scheduler
enableBatchScheduler bool
defaultBatchScheduler string
// Spark web UI service and ingress
enableUIService bool
ingressClassName string
ingressURLFormat string
// Leader election
enableLeaderElection bool
leaderElectionLockName string
leaderElectionLockNamespace string
leaderElectionLeaseDuration time.Duration
leaderElectionRenewDeadline time.Duration
leaderElectionRetryPeriod time.Duration
// Metrics
enableMetrics bool
metricsBindAddress string
metricsEndpoint string
metricsPrefix string
metricsLabels []string
metricsJobStartLatencyBuckets []float64
healthProbeBindAddress string
secureMetrics bool
enableHTTP2 bool
development bool
zapOptions = logzap.Options{}
)
func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(v1beta1.AddToScheme(scheme))
utilruntime.Must(v1beta2.AddToScheme(scheme))
// +kubebuilder:scaffold:scheme
}
func NewStartCommand() *cobra.Command {
var command = &cobra.Command{
Use: "start",
Short: "Start controller and webhook",
PreRun: func(_ *cobra.Command, args []string) {
development = viper.GetBool("development")
},
Run: func(_ *cobra.Command, args []string) {
sparkoperator.PrintVersion(false)
start()
},
}
command.Flags().IntVar(&controllerThreads, "controller-threads", 10, "Number of worker threads used by the SparkApplication controller.")
command.Flags().StringSliceVar(&namespaces, "namespaces", []string{""}, "The Kubernetes namespace to manage. Will manage custom resource objects of the managed CRD types for the whole cluster if unset.")
command.Flags().DurationVar(&cacheSyncTimeout, "cache-sync-timeout", 30*time.Second, "Informer cache sync timeout.")
command.Flags().BoolVar(&enableBatchScheduler, "enable-batch-scheduler", false, "Enable batch schedulers.")
command.Flags().StringVar(&defaultBatchScheduler, "default-batch-scheduler", "", "Default batch scheduler.")
command.Flags().BoolVar(&enableUIService, "enable-ui-service", true, "Enable Spark Web UI service.")
command.Flags().StringVar(&ingressClassName, "ingress-class-name", "", "Set ingressClassName for ingress resources created.")
command.Flags().StringVar(&ingressURLFormat, "ingress-url-format", "", "Ingress URL format.")
command.Flags().BoolVar(&enableLeaderElection, "leader-election", false, "Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
command.Flags().StringVar(&leaderElectionLockName, "leader-election-lock-name", "spark-operator-lock", "Name of the ConfigMap for leader election.")
command.Flags().StringVar(&leaderElectionLockNamespace, "leader-election-lock-namespace", "spark-operator", "Namespace in which to create the ConfigMap for leader election.")
command.Flags().DurationVar(&leaderElectionLeaseDuration, "leader-election-lease-duration", 15*time.Second, "Leader election lease duration.")
command.Flags().DurationVar(&leaderElectionRenewDeadline, "leader-election-renew-deadline", 14*time.Second, "Leader election renew deadline.")
command.Flags().DurationVar(&leaderElectionRetryPeriod, "leader-election-retry-period", 4*time.Second, "Leader election retry period.")
command.Flags().BoolVar(&enableMetrics, "enable-metrics", false, "Enable metrics.")
command.Flags().StringVar(&metricsBindAddress, "metrics-bind-address", "0", "The address the metric endpoint binds to. "+
"Use the port :8080. If not set, it will be 0 in order to disable the metrics server")
command.Flags().StringVar(&metricsEndpoint, "metrics-endpoint", "/metrics", "Metrics endpoint.")
command.Flags().StringVar(&metricsPrefix, "metrics-prefix", "", "Prefix for the metrics.")
command.Flags().StringSliceVar(&metricsLabels, "metrics-labels", []string{}, "Labels to be added to the metrics.")
command.Flags().Float64SliceVar(&metricsJobStartLatencyBuckets, "metrics-job-start-latency-buckets", []float64{30, 60, 90, 120, 150, 180, 210, 240, 270, 300}, "Buckets for the job start latency histogram.")
command.Flags().StringVar(&healthProbeBindAddress, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
command.Flags().BoolVar(&secureMetrics, "secure-metrics", false, "If set the metrics endpoint is served securely")
command.Flags().BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers")
flagSet := flag.NewFlagSet("controller", flag.ExitOnError)
ctrl.RegisterFlags(flagSet)
zapOptions.BindFlags(flagSet)
command.Flags().AddGoFlagSet(flagSet)
return command
}
func start() {
setupLog()
// Create the client rest config. Use kubeConfig if given, otherwise assume in-cluster.
cfg, err := ctrl.GetConfig()
if err != nil {
logger.Error(err, "failed to get kube config")
os.Exit(1)
}
// Create the manager.
tlsOptions := newTLSOptions()
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
Scheme: scheme,
Cache: newCacheOptions(),
Metrics: metricsserver.Options{
BindAddress: metricsBindAddress,
SecureServing: secureMetrics,
TLSOpts: tlsOptions,
},
WebhookServer: ctrlwebhook.NewServer(ctrlwebhook.Options{
TLSOpts: tlsOptions,
}),
HealthProbeBindAddress: healthProbeBindAddress,
LeaderElection: enableLeaderElection,
LeaderElectionID: leaderElectionLockName,
LeaderElectionNamespace: leaderElectionLockNamespace,
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
// when the Manager ends. This requires the binary to immediately end when the
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
// speeds up voluntary leader transitions as the new leader don't have to wait
// LeaseDuration time first.
//
// In the default scaffold provided, the program ends immediately after
// the manager stops, so would be fine to enable this option. However,
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
})
if err != nil {
logger.Error(err, "failed to create manager")
os.Exit(1)
}
var registry *scheduler.Registry
if enableBatchScheduler {
registry = scheduler.GetRegistry()
_ = registry.Register(common.VolcanoSchedulerName, volcano.Factory)
_ = registry.Register(yunikorn.SchedulerName, yunikorn.Factory)
schedulerNames := registry.GetRegisteredSchedulerNames()
if defaultBatchScheduler != "" && !slices.Contains(schedulerNames, defaultBatchScheduler) {
logger.Error(nil, "Failed to find default batch scheduler in registered schedulers")
os.Exit(1)
}
}
// Setup controller for SparkApplication.
if err = sparkapplication.NewReconciler(
mgr,
mgr.GetScheme(),
mgr.GetClient(),
mgr.GetEventRecorderFor("spark-application-controller"),
registry,
newSparkApplicationReconcilerOptions(),
).SetupWithManager(mgr, newControllerOptions()); err != nil {
logger.Error(err, "Failed to create controller", "controller", "SparkApplication")
os.Exit(1)
}
// Setup controller for ScheduledSparkApplication.
if err = scheduledsparkapplication.NewReconciler(
mgr.GetScheme(),
mgr.GetClient(),
mgr.GetEventRecorderFor("scheduled-spark-application-controller"),
clock.RealClock{},
newScheduledSparkApplicationReconcilerOptions(),
).SetupWithManager(mgr, newControllerOptions()); err != nil {
logger.Error(err, "Failed to create controller", "controller", "ScheduledSparkApplication")
os.Exit(1)
}
// +kubebuilder:scaffold:builder
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
logger.Error(err, "Failed to set up health check")
os.Exit(1)
}
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
logger.Error(err, "Failed to set up ready check")
os.Exit(1)
}
logger.Info("Starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
logger.Error(err, "Failed to start manager")
os.Exit(1)
}
}
// setupLog Configures the logging system
func setupLog() {
ctrl.SetLogger(logzap.New(
logzap.UseFlagOptions(&zapOptions),
func(o *logzap.Options) {
o.Development = development
}, func(o *logzap.Options) {
o.ZapOpts = append(o.ZapOpts, zap.AddCaller())
}, func(o *logzap.Options) {
var config zapcore.EncoderConfig
if !development {
config = zap.NewProductionEncoderConfig()
} else {
config = zap.NewDevelopmentEncoderConfig()
}
config.EncodeLevel = zapcore.CapitalColorLevelEncoder
config.EncodeTime = zapcore.ISO8601TimeEncoder
config.EncodeCaller = zapcore.ShortCallerEncoder
o.Encoder = zapcore.NewConsoleEncoder(config)
}),
)
}
func newTLSOptions() []func(c *tls.Config) {
// if the enable-http2 flag is false (the default), http/2 should be disabled
// due to its vulnerabilities. More specifically, disabling http/2 will
// prevent from being vulnerable to the HTTP/2 Stream Cancellation and
// Rapid Reset CVEs. For more information see:
// - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
// - https://github.com/advisories/GHSA-4374-p667-p6c8
disableHTTP2 := func(c *tls.Config) {
logger.Info("disabling http/2")
c.NextProtos = []string{"http/1.1"}
}
tlsOpts := []func(*tls.Config){}
if !enableHTTP2 {
tlsOpts = append(tlsOpts, disableHTTP2)
}
return tlsOpts
}
// newCacheOptions creates and returns a cache.Options instance configured with default namespaces and object caching settings.
func newCacheOptions() cache.Options {
defaultNamespaces := make(map[string]cache.Config)
if !util.ContainsString(namespaces, cache.AllNamespaces) {
for _, ns := range namespaces {
defaultNamespaces[ns] = cache.Config{}
}
}
options := cache.Options{
Scheme: scheme,
DefaultNamespaces: defaultNamespaces,
ByObject: map[client.Object]cache.ByObject{
&corev1.Pod{}: {
Label: labels.SelectorFromSet(labels.Set{
common.LabelLaunchedBySparkOperator: "true",
}),
},
&corev1.ConfigMap{}: {},
&corev1.PersistentVolumeClaim{}: {},
&corev1.Service{}: {},
&v1beta2.SparkApplication{}: {},
},
}
return options
}
// newControllerOptions creates and returns a controller.Options instance configured with the given options.
func newControllerOptions() controller.Options {
options := controller.Options{
MaxConcurrentReconciles: controllerThreads,
CacheSyncTimeout: cacheSyncTimeout,
}
return options
}
func newSparkApplicationReconcilerOptions() sparkapplication.Options {
var sparkApplicationMetrics *metrics.SparkApplicationMetrics
var sparkExecutorMetrics *metrics.SparkExecutorMetrics
if enableMetrics {
sparkApplicationMetrics = metrics.NewSparkApplicationMetrics(metricsPrefix, metricsLabels, metricsJobStartLatencyBuckets)
sparkApplicationMetrics.Register()
sparkExecutorMetrics = metrics.NewSparkExecutorMetrics(metricsPrefix, metricsLabels)
sparkExecutorMetrics.Register()
}
options := sparkapplication.Options{
Namespaces: namespaces,
EnableUIService: enableUIService,
IngressClassName: ingressClassName,
IngressURLFormat: ingressURLFormat,
DefaultBatchScheduler: defaultBatchScheduler,
SparkApplicationMetrics: sparkApplicationMetrics,
SparkExecutorMetrics: sparkExecutorMetrics,
}
return options
}
func newScheduledSparkApplicationReconcilerOptions() scheduledsparkapplication.Options {
options := scheduledsparkapplication.Options{
Namespaces: namespaces,
}
return options
}