notebooks/components/pvcviewer-controller/controllers/pvcviewer_controller.go

446 lines
14 KiB
Go

/*
Copyright 2023.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controllers
import (
"context"
"fmt"
"os"
"github.com/go-logr/logr"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
kubefloworgv1alpha1 "github.com/kubeflow/kubeflow/components/pvc-viewer/api/v1alpha1"
)
// PVCViewerReconciler reconciles a PVCViewer object
type PVCViewerReconciler struct {
client.Client
Scheme *runtime.Scheme
}
const (
// We use a resource prefix so that the names of generated resources like deployments are unique
resourcePrefix = "pvcviewer-"
nameLabelKey = "app.kubernetes.io/name"
instanceLabelKey = "app.kubernetes.io/instance"
partOfLabelKey = "app.kubernetes.io/part-of"
partOfLabelValue = "pvc-viewer"
servicePort = int32(80)
istioGatewayEnvKey = "ISTIO_GATEWAY"
defaultIstioGateway = "kubeflow/kubeflow-gateway"
)
var (
virtualServiceTemplate = &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "networking.istio.io/v1alpha3",
"kind": "VirtualService",
},
}
)
// Default permissions for the PVCViewer
// +kubebuilder:rbac:groups=kubeflow.org,resources=pvcviewers,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=kubeflow.org,resources=pvcviewers/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=kubeflow.org,resources=pvcviewers/finalizers,verbs=update
// Add permissions to create child resources
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update
// +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices,verbs=get;list;watch;create;update
// Add permissions to read external resources
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch
// SetupWithManager sets up the controller with the Manager.
func (r *PVCViewerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&kubefloworgv1alpha1.PVCViewer{}).
// This controller manages, i.e. creates these kinds for a PVCViewer
Owns(&appsv1.Deployment{}).
Owns(&corev1.Service{}).
Owns(virtualServiceTemplate).
Complete(r)
}
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
func (r *PVCViewerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
instance := &kubefloworgv1alpha1.PVCViewer{}
if err := r.Get(ctx, req.NamespacedName, instance); err != nil {
// Created objects are automatically garbage collected if parent is deleted
return reconcile.Result{}, client.IgnoreNotFound(err)
}
if !instance.ObjectMeta.DeletionTimestamp.IsZero() {
// The object is being deleted
// Do nothing as the resources are automatically garbage collected
log.Info("PVCViewer is being deleted")
// Keep on reconciling status until the finalizer is removed
if err := r.reconcileStatus(ctx, log, instance.Name, instance.Namespace); err != nil {
log.Error(err, "Error while reconciling status")
return ctrl.Result{}, err
}
return reconcile.Result{}, nil
}
commonLabels := map[string]string{
nameLabelKey: instance.Name,
instanceLabelKey: resourcePrefix + instance.Name,
partOfLabelKey: partOfLabelValue,
}
if err := r.reconcileDeployment(ctx, log, instance, commonLabels); err != nil {
log.Error(err, "Error while reconciling deployment")
return ctrl.Result{}, err
}
if err := r.reconcileService(ctx, log, instance, commonLabels); err != nil {
log.Error(err, "Error while reconciling service")
return ctrl.Result{}, err
}
if err := r.reconcileVirtualService(ctx, log, instance, commonLabels); err != nil {
log.Error(err, "Error while reconciling virtual service")
return ctrl.Result{}, err
}
if err := r.reconcileStatus(ctx, log, instance.Name, instance.Namespace); err != nil {
log.Error(err, "Error while reconciling status")
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
}
// Creates or updates the deployment as defined by the viewer's podSpec
func (r *PVCViewerReconciler) reconcileDeployment(ctx context.Context, log logr.Logger, viewer *kubefloworgv1alpha1.PVCViewer, commonLabels map[string]string) error {
deployment := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: resourcePrefix + viewer.Name,
Namespace: viewer.Namespace,
Labels: commonLabels,
},
}
createDeployment := false
if err := r.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, deployment); err != nil {
if !apierrs.IsNotFound(err) {
return err
}
createDeployment = true
}
var (
// Do not change affinity or rwoClaims by default
affinity = deployment.Spec.Template.Spec.Affinity
// Affinity is only to be set when rwo scheduling is enabled and the deployment is to be newly created
determineAffinity = viewer.Spec.RWOScheduling && createDeployment
)
if determineAffinity {
if newAffinity, err := r.generateAffinity(ctx, log, viewer); err != nil {
return err
} else if newAffinity != nil {
// Only set the affinity if it is not nil - we wouldn't win anything by restarting without affinity
affinity = newAffinity
}
}
deployment.Spec.Selector = &metav1.LabelSelector{
MatchLabels: commonLabels,
}
deployment.Spec.Template = corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: commonLabels,
},
Spec: viewer.Spec.PodSpec,
}
// We're using a recreate strategy to ensure that the pod is restarted when the affinity change.
// Otherwise, we could be mounting the same PVC to multiple pods, preventing the pod from starting.
deployment.Spec.Strategy = appsv1.DeploymentStrategy{
Type: appsv1.RecreateDeploymentStrategyType,
}
deployment.Spec.Template.Spec.Affinity = affinity
if err := ctrl.SetControllerReference(viewer, deployment, r.Scheme); err != nil {
return err
}
if createDeployment {
log.Info("Creating Deployment")
return r.Create(ctx, deployment)
}
log.Info("Updating Deployment")
return r.Update(ctx, deployment)
}
// Creates or updates the service as defined by the viewer's service
func (r *PVCViewerReconciler) reconcileService(ctx context.Context, log logr.Logger, viewer *kubefloworgv1alpha1.PVCViewer, commonLabels map[string]string) error {
if viewer.Spec.Networking == (kubefloworgv1alpha1.Networking{}) {
return nil
}
service := &corev1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: resourcePrefix + viewer.Name,
Namespace: viewer.Namespace,
Labels: commonLabels,
},
}
createService := false
if err := r.Get(ctx, types.NamespacedName{Name: service.Name, Namespace: service.Namespace}, service); err != nil {
if !apierrs.IsNotFound(err) {
return err
}
createService = true
}
service.Spec.Type = "ClusterIP"
service.Spec.Selector = commonLabels
service.Spec.Ports = []corev1.ServicePort{
{
Name: "http",
Port: servicePort,
TargetPort: viewer.Spec.Networking.TargetPort,
},
}
if err := ctrl.SetControllerReference(viewer, service, r.Scheme); err != nil {
return err
}
if createService {
log.Info("Creating Service")
return r.Create(ctx, service)
}
log.Info("Updating Service")
return r.Update(ctx, service)
}
func (r *PVCViewerReconciler) reconcileVirtualService(ctx context.Context, log logr.Logger, viewer *kubefloworgv1alpha1.PVCViewer, commonLabels map[string]string) error {
if viewer.Spec.Networking == (kubefloworgv1alpha1.Networking{}) {
return nil
}
virtualService := &unstructured.Unstructured{
Object: map[string]interface{}{
"apiVersion": "networking.istio.io/v1alpha3",
"kind": "VirtualService",
"metadata": map[string]interface{}{
"name": resourcePrefix + viewer.Name,
"namespace": viewer.Namespace,
"labels": commonLabels,
},
},
}
createVirtualService := false
if err := r.Get(ctx, types.NamespacedName{Name: virtualService.GetName(), Namespace: virtualService.GetNamespace()}, virtualService); err != nil {
if !apierrs.IsNotFound(err) {
return err
}
createVirtualService = true
}
prefix := fmt.Sprintf("%s/%s/%s/", viewer.Spec.Networking.BasePrefix, viewer.Namespace, viewer.Name)
rewrite := prefix
if viewer.Spec.Networking.Rewrite != "" {
rewrite = viewer.Spec.Networking.Rewrite
}
service := fmt.Sprintf("%s%s.%s.svc.cluster.local", resourcePrefix, viewer.Name, viewer.Namespace)
var timeout *string = nil
if viewer.Spec.Networking.Timeout != "" {
timeout = &viewer.Spec.Networking.Timeout
}
// Get the istio gateway from the environment variable or use the default
istioGateway := os.Getenv(istioGatewayEnvKey)
if istioGateway == "" {
istioGateway = defaultIstioGateway
}
virtualService.Object["spec"] = map[string]interface{}{
"hosts": []string{"*"},
"gateways": []string{
istioGateway,
},
"http": []interface{}{
map[string]interface{}{
"match": []interface{}{
map[string]interface{}{
"uri": map[string]interface{}{
"prefix": prefix,
},
},
},
"rewrite": map[string]interface{}{
"uri": rewrite,
},
"route": []interface{}{
map[string]interface{}{
"destination": map[string]interface{}{
"host": service,
"port": map[string]interface{}{
"number": int64(servicePort),
},
},
},
},
"timeout": timeout,
},
},
}
if err := ctrl.SetControllerReference(viewer, virtualService, r.Scheme); err != nil {
return err
}
if createVirtualService {
log.Info("Creating Virtual Service")
return r.Create(ctx, virtualService)
}
log.Info("Updating Virtual Service")
return r.Update(ctx, virtualService)
}
// Computes and updates the status of the PVCViewer
func (r *PVCViewerReconciler) reconcileStatus(ctx context.Context, log logr.Logger, viewerName string, viewerNamespace string) error {
viewer := &kubefloworgv1alpha1.PVCViewer{}
if err := r.Get(ctx, types.NamespacedName{Name: viewerName, Namespace: viewerNamespace}, viewer); err != nil {
return err
}
if viewer.Spec.Networking != (kubefloworgv1alpha1.Networking{}) {
url := fmt.Sprintf("%s/%s/%s/", viewer.Spec.Networking.BasePrefix, viewer.Namespace, viewer.Name)
viewer.Status.URL = &url
} else {
viewer.Status.URL = nil
}
deployment := &appsv1.Deployment{}
if err := r.Get(ctx, types.NamespacedName{Name: resourcePrefix + viewer.Name, Namespace: viewer.Namespace}, deployment); err != nil {
log.Info("Could not find Deployment for status update")
viewer.Status.Ready = false
} else {
viewer.Status.Ready = *deployment.Spec.Replicas == deployment.Status.ReadyReplicas
// Append the latest condition, if it is not already in the list
if len(deployment.Status.Conditions) > 0 {
clen := len(viewer.Status.Conditions)
if clen == 0 || viewer.Status.Conditions[clen-1] != deployment.Status.Conditions[0] {
viewer.Status.Conditions = append(viewer.Status.Conditions, deployment.Status.Conditions[0])
}
}
}
log.Info("Updating status")
return r.Client.Status().Update(ctx, viewer)
}
// Generates the affinity to be used for the deployment
// In case no affinity should be used (e.g. RWOScheduling is disabled) or updated, nil is returned
func (r *PVCViewerReconciler) generateAffinity(ctx context.Context, log logr.Logger, viewer *kubefloworgv1alpha1.PVCViewer) (*corev1.Affinity, error) {
// Check if the viewer's PVC is RWO access mode
pvc := &corev1.PersistentVolumeClaim{}
if err := r.Get(ctx, types.NamespacedName{Name: viewer.Spec.PVC, Namespace: viewer.Namespace}, pvc); err != nil {
if apierrs.IsNotFound(err) {
log.Info("Omitting Affinity: PVC not found")
// Should we return an error here or suppress it and let the Deployment fail?
// Latter might be better and more visible to the user
return nil, nil
}
return nil, err
}
if len(pvc.Spec.AccessModes) != 1 || pvc.Spec.AccessModes[0] != corev1.ReadWriteOnce {
log.Info("Omitting Affinity: PVC is not RWO")
return nil, nil
}
// Get all pods in namespace and filter by RWO PVCs
podList := &corev1.PodList{}
if err := r.List(ctx, podList, client.InNamespace(viewer.Namespace)); err != nil {
return nil, err
}
var nodeName *string
for _, pod := range podList.Items {
// Skip pods this controller created
if partOf, ok := pod.Labels[partOfLabelKey]; ok && partOf == partOfLabelValue {
continue
}
for _, volume := range pod.Spec.Volumes {
if volume.PersistentVolumeClaim != nil && volume.PersistentVolumeClaim.ClaimName != "" {
if volume.PersistentVolumeClaim.ClaimName == pvc.Name {
if nodeName != nil {
// Rather than throwing an error, we just omit the affinity, leaving the current deployment's affinity unchanged
log.Info("Omitting Affinity: Viewer references RWO volumes on multiple nodes",
"nodes", []string{*nodeName, pod.Spec.NodeName})
return nil, nil
}
if pod.Spec.NodeName == "" {
log.Info("Omitting Affinity: Viewer references RWO volume on pod without nodeName")
return nil, nil
}
nodeName = &pod.Spec.NodeName
}
}
}
}
if nodeName == nil {
log.Info("Omitting Affinity: PVC not used by other Pods")
return nil, nil
}
// Generate Affinity using the node name
affinity := &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{
{
Weight: 100,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "kubernetes.io/hostname",
Operator: "In",
Values: []string{*nodeName},
},
},
},
},
},
},
}
return affinity, nil
}