539 lines
17 KiB
Go
539 lines
17 KiB
Go
/*
|
|
Copyright 2022.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controllers
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"reflect"
|
|
"strings"
|
|
|
|
"github.com/go-logr/logr"
|
|
"github.com/gogo/protobuf/proto"
|
|
appsv1 "k8s.io/api/apps/v1"
|
|
corev1 "k8s.io/api/core/v1"
|
|
"k8s.io/apimachinery/pkg/api/errors"
|
|
apierrs "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/intstr"
|
|
ctrl "sigs.k8s.io/controller-runtime"
|
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
|
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
|
|
|
reconcilehelper "github.com/kubeflow/kubeflow/components/common/reconcilehelper"
|
|
tensorboardv1alpha1 "github.com/kubeflow/kubeflow/components/tensorboard-controller/api/v1alpha1"
|
|
)
|
|
|
|
// TensorboardReconciler reconciles a Tensorboard object
|
|
type TensorboardReconciler struct {
|
|
client.Client
|
|
Log logr.Logger
|
|
}
|
|
|
|
//+kubebuilder:rbac:groups=tensorboard.kubeflow.org,resources=tensorboards,verbs=get;list;watch;create;update;patch;delete
|
|
//+kubebuilder:rbac:groups=tensorboard.kubeflow.org,resources=tensorboards/status,verbs=get;update;patch
|
|
//+kubebuilder:rbac:groups=tensorboard.kubeflow.org,resources=tensorboards/finalizers,verbs=update
|
|
// +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update
|
|
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create;update
|
|
// +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices,verbs=get;list;watch;create;update
|
|
// +kubebuilder:rbac:groups=core,resources=persistentvolumeclaims,verbs=get;list;watch
|
|
// +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch
|
|
|
|
// Reconcile is part of the main kubernetes reconciliation loop which aims to
|
|
// move the current state of the cluster closer to the desired state.
|
|
// the Tensorboard object against the actual cluster state, and then
|
|
// perform operations to make the cluster state reflect the state specified by
|
|
// the user.
|
|
//
|
|
// For more details, check Reconcile and its Result here:
|
|
// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.11.0/pkg/reconcile
|
|
func (r *TensorboardReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
|
|
|
|
logger := r.Log.WithValues("tensorboard", req.NamespacedName)
|
|
|
|
instance := &tensorboardv1alpha1.Tensorboard{}
|
|
err := r.Get(ctx, req.NamespacedName, instance)
|
|
if err != nil {
|
|
if errors.IsNotFound(err) {
|
|
// Object not found, return. Created objects are automatically garbage collected.
|
|
// For additional cleanup logic use finalizers.
|
|
return reconcile.Result{}, nil
|
|
}
|
|
// Error reading the object - requeue the request.
|
|
return reconcile.Result{}, err
|
|
}
|
|
|
|
// tensorboards-web-app deletes objects using foreground deletion policy, Tensorboard CR will stay until all owned objects are deleted
|
|
// reconcile loop might keep on trying to recreate the resources that the API server tries to delete.
|
|
// so when Tensorboard CR is terminating, reconcile loop should do nothing
|
|
|
|
if !instance.DeletionTimestamp.IsZero() {
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
// Reconcile k8s deployment.
|
|
deployment, err := generateDeployment(instance, logger, r)
|
|
if err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
if err := ctrl.SetControllerReference(instance, deployment, r.Scheme()); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
if err := Deployment(ctx, r, deployment, logger); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
// Reconcile k8s service.
|
|
service := generateService(instance)
|
|
if err := ctrl.SetControllerReference(instance, service, r.Scheme()); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
if err := reconcilehelper.Service(ctx, r, service, logger); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
// Reconcile istio virtual service.
|
|
virtualService, err := generateVirtualService(instance)
|
|
if err := ctrl.SetControllerReference(instance, virtualService, r.Scheme()); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
if err := reconcilehelper.VirtualService(ctx, r, virtualService.GetName(), virtualService.GetNamespace(), virtualService, logger); err != nil {
|
|
return ctrl.Result{}, err
|
|
}
|
|
|
|
foundDeployment := &appsv1.Deployment{}
|
|
//Update the instance.Status.ReadyReplicas if the foundDeployment.Status.ReadyReplicas
|
|
//has changed.
|
|
_err := r.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, foundDeployment)
|
|
|
|
if _err != nil {
|
|
if apierrs.IsNotFound(err) {
|
|
logger.Info("Deployment not found...", "deployment", deployment.Name)
|
|
} else {
|
|
return ctrl.Result{}, _err
|
|
}
|
|
} else {
|
|
if len(foundDeployment.Status.Conditions) > 0 {
|
|
condition := tensorboardv1alpha1.TensorboardCondition{
|
|
DeploymentState: foundDeployment.Status.Conditions[0].Type,
|
|
LastProbeTime: foundDeployment.Status.Conditions[0].LastUpdateTime,
|
|
}
|
|
clen := len(instance.Status.Conditions)
|
|
if clen == 0 || instance.Status.Conditions[clen-1].DeploymentState != condition.DeploymentState {
|
|
instance.Status.Conditions = append(instance.Status.Conditions, condition)
|
|
}
|
|
logger.Info("instance condition..", "condition", instance)
|
|
}
|
|
|
|
if foundDeployment.Status.ReadyReplicas != instance.Status.ReadyReplicas {
|
|
logger.Info("Updating Status", "namespace", instance.Namespace, "name", instance.Name)
|
|
instance.Status.ReadyReplicas = foundDeployment.Status.ReadyReplicas
|
|
}
|
|
|
|
_err = r.Status().Update(ctx, instance)
|
|
if _err != nil {
|
|
return ctrl.Result{}, _err
|
|
}
|
|
}
|
|
|
|
return ctrl.Result{}, nil
|
|
}
|
|
|
|
// SetupWithManager sets up the controller with the Manager.
|
|
func (r *TensorboardReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
|
return ctrl.NewControllerManagedBy(mgr).
|
|
For(&tensorboardv1alpha1.Tensorboard{}).
|
|
Owns(&appsv1.Deployment{}).
|
|
Complete(r)
|
|
}
|
|
|
|
func generateDeployment(tb *tensorboardv1alpha1.Tensorboard, log logr.Logger, r *TensorboardReconciler) (*appsv1.Deployment, error) {
|
|
var volumeMounts []corev1.VolumeMount
|
|
var volumes []corev1.Volume
|
|
var mountpath, subpath string = tb.Spec.LogsPath, ""
|
|
var affinity = &corev1.Affinity{}
|
|
tensorboardImage, err := getEnvVariable("TENSORBOARD_IMAGE")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
//In this case, a PVC is used as a log storage for the Tensorboard server.
|
|
if !isCloudPath(tb.Spec.LogsPath) {
|
|
var pvcname string
|
|
|
|
//General case, in which tb.Spec.LogsPath follows the format: "pvc://<pvc-name>/<local-path>".
|
|
if isPVCPath(tb.Spec.LogsPath) {
|
|
pvcname = extractPVCName(tb.Spec.LogsPath)
|
|
mountpath = "/tensorboard_logs/"
|
|
subpath = extractPVCSubPath(tb.Spec.LogsPath)
|
|
} else {
|
|
//Maintaining backwards compatibility with previous version of the controller.
|
|
pvcname = "tb-volume"
|
|
subpath = ""
|
|
}
|
|
|
|
volumeMounts = append(volumeMounts, corev1.VolumeMount{
|
|
Name: "tbpd",
|
|
ReadOnly: true,
|
|
MountPath: mountpath,
|
|
SubPath: subpath,
|
|
})
|
|
volumes = append(volumes, corev1.Volume{
|
|
Name: "tbpd",
|
|
VolumeSource: corev1.VolumeSource{
|
|
PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
|
|
ClaimName: pvcname,
|
|
},
|
|
},
|
|
})
|
|
|
|
if err, sch := rwoPVCScheduling(); err != nil {
|
|
return nil, err
|
|
} else if sch {
|
|
//If 'RWO_PVC_SCHEDULING' env var is set to "true", an extra scheduling functionality is added,
|
|
//for the case that the Tensorboard Server is using a RWO PVC (as a log storage)
|
|
//and the PVC is already mounted by another pod.
|
|
|
|
//Get the PVC that will be accessed by the Tensorboard Server.
|
|
var pvc = &corev1.PersistentVolumeClaim{}
|
|
if err := r.Get(context.Background(), client.ObjectKey{
|
|
Namespace: tb.Namespace,
|
|
Name: pvcname,
|
|
}, pvc); err != nil {
|
|
return nil, fmt.Errorf("Get PersistentVolumeClaim error: %v", err)
|
|
}
|
|
|
|
//If the PVC is mounted as a ReadWriteOnce volume by a pod that is running on a node X,
|
|
//then we find the NodeName of X so that the Tensorboard server
|
|
//(that must access the volume) will be deployed on X using nodeAffinity.
|
|
if len(pvc.Status.AccessModes) > 0 && pvc.Status.AccessModes[0] == corev1.ReadWriteOnce {
|
|
if err := generateNodeAffinity(affinity, pvcname, r, tb); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
} else if isGoogleCloudPath(tb.Spec.LogsPath) {
|
|
//In this case, a Google cloud bucket is used as a log storage for the Tensorboard server.
|
|
volumeMounts = append(volumeMounts, corev1.VolumeMount{
|
|
Name: "gcp-creds",
|
|
ReadOnly: true,
|
|
MountPath: "/secret/gcp",
|
|
})
|
|
volumes = append(volumes, corev1.Volume{
|
|
Name: "gcp-creds",
|
|
VolumeSource: corev1.VolumeSource{
|
|
Secret: &corev1.SecretVolumeSource{
|
|
SecretName: "user-gcp-sa",
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
// copy all of the CR labels to the pod which includes poddefault related labels
|
|
podLabels := map[string]string{}
|
|
for k, v := range tb.ObjectMeta.Labels {
|
|
(podLabels)[k] = v
|
|
}
|
|
(podLabels)["app"] = tb.Name
|
|
|
|
return &appsv1.Deployment{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: tb.Name,
|
|
Namespace: tb.Namespace,
|
|
},
|
|
Spec: appsv1.DeploymentSpec{
|
|
Replicas: proto.Int32(1),
|
|
Selector: &metav1.LabelSelector{
|
|
MatchLabels: map[string]string{
|
|
"app": tb.Name,
|
|
},
|
|
},
|
|
Template: corev1.PodTemplateSpec{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Labels: podLabels,
|
|
},
|
|
Spec: corev1.PodSpec{
|
|
Affinity: affinity,
|
|
RestartPolicy: corev1.RestartPolicyAlways,
|
|
Containers: []corev1.Container{
|
|
{
|
|
Name: "tensorboard",
|
|
Image: tensorboardImage,
|
|
ImagePullPolicy: "IfNotPresent",
|
|
Command: []string{"/usr/local/bin/tensorboard"},
|
|
WorkingDir: "/",
|
|
Args: []string{
|
|
"--logdir=" + mountpath,
|
|
"--bind_all",
|
|
},
|
|
Ports: []corev1.ContainerPort{
|
|
{
|
|
ContainerPort: 6006,
|
|
},
|
|
},
|
|
VolumeMounts: volumeMounts,
|
|
},
|
|
},
|
|
Volumes: volumes,
|
|
},
|
|
},
|
|
},
|
|
}, nil
|
|
}
|
|
|
|
func generateService(tb *tensorboardv1alpha1.Tensorboard) *corev1.Service {
|
|
return &corev1.Service{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: tb.Name,
|
|
Namespace: tb.Namespace,
|
|
},
|
|
Spec: corev1.ServiceSpec{
|
|
Type: "ClusterIP",
|
|
Selector: map[string]string{"app": tb.Name},
|
|
Ports: []corev1.ServicePort{
|
|
corev1.ServicePort{
|
|
Name: "http-" + tb.Name,
|
|
Port: 80,
|
|
TargetPort: intstr.FromInt(6006),
|
|
},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func generateVirtualService(tb *tensorboardv1alpha1.Tensorboard) (*unstructured.Unstructured, error) {
|
|
prefix := fmt.Sprintf("/tensorboard/%s/%s/", tb.Namespace, tb.Name)
|
|
rewrite := "/"
|
|
service := fmt.Sprintf("%s.%s.svc.cluster.local", tb.Name, tb.Namespace)
|
|
istioGateway, err := getEnvVariable("ISTIO_GATEWAY")
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
vsvc := &unstructured.Unstructured{}
|
|
vsvc.SetAPIVersion("networking.istio.io/v1alpha3")
|
|
vsvc.SetKind("VirtualService")
|
|
vsvc.SetName(tb.Name)
|
|
vsvc.SetNamespace(tb.Namespace)
|
|
if err := unstructured.SetNestedStringSlice(vsvc.Object, []string{"*"}, "spec", "hosts"); err != nil {
|
|
return nil, fmt.Errorf("Set .spec.hosts error: %v", err)
|
|
}
|
|
if err := unstructured.SetNestedStringSlice(vsvc.Object, []string{istioGateway},
|
|
"spec", "gateways"); err != nil {
|
|
return nil, fmt.Errorf("Set .spec.gateways error: %v", err)
|
|
}
|
|
|
|
http := []interface{}{
|
|
map[string]interface{}{
|
|
"match": []interface{}{
|
|
map[string]interface{}{
|
|
"uri": map[string]interface{}{
|
|
"prefix": prefix,
|
|
},
|
|
},
|
|
},
|
|
"rewrite": map[string]interface{}{
|
|
"uri": rewrite,
|
|
},
|
|
"route": []interface{}{
|
|
map[string]interface{}{
|
|
"destination": map[string]interface{}{
|
|
"host": service,
|
|
"port": map[string]interface{}{
|
|
"number": int64(80),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"timeout": "300s",
|
|
},
|
|
}
|
|
if err := unstructured.SetNestedSlice(vsvc.Object, http, "spec", "http"); err != nil {
|
|
return nil, fmt.Errorf("Set .spec.http error: %v", err)
|
|
}
|
|
|
|
return vsvc, nil
|
|
}
|
|
|
|
func isCloudPath(path string) bool {
|
|
return isGoogleCloudPath(path) || strings.HasPrefix(path, "s3://") || strings.HasPrefix(path, "/cns/")
|
|
}
|
|
|
|
func isGoogleCloudPath(path string) bool {
|
|
return strings.HasPrefix(path, "gs://")
|
|
}
|
|
|
|
func isPVCPath(path string) bool {
|
|
return strings.HasPrefix(path, "pvc://")
|
|
}
|
|
|
|
func extractPVCName(path string) string {
|
|
trimmed := strings.TrimPrefix(path, "pvc://") //removing "pvc://" prefix
|
|
ending := strings.Index(trimmed, "/") //finding ending index of pvc-name string
|
|
if ending == -1 {
|
|
return trimmed
|
|
} else {
|
|
return trimmed[0:ending]
|
|
}
|
|
}
|
|
|
|
func extractPVCSubPath(path string) string {
|
|
trimmed := strings.TrimPrefix(path, "pvc://") //removing "pvc://" prefix
|
|
start := strings.Index(trimmed, "/") //finding starting index of local path inside PVC
|
|
if start == -1 || len(trimmed) == start+1 {
|
|
return ""
|
|
} else {
|
|
return trimmed[start+1:]
|
|
}
|
|
}
|
|
|
|
//Searches a corev1.PodList for running pods and returns
|
|
//a running corev1.Pod (if exists)
|
|
func findRunningPod(pods *corev1.PodList) corev1.Pod {
|
|
for _, pod := range pods.Items {
|
|
if pod.Status.Phase == "Running" {
|
|
return pod
|
|
}
|
|
}
|
|
|
|
return corev1.Pod{}
|
|
}
|
|
|
|
func extractNodeName(pod corev1.Pod) string {
|
|
return pod.Spec.NodeName
|
|
}
|
|
|
|
func generateNodeAffinity(affinity *corev1.Affinity, pvcname string, r *TensorboardReconciler, tb *tensorboardv1alpha1.Tensorboard) error {
|
|
var nodename string
|
|
var pods = &corev1.PodList{}
|
|
var pod corev1.Pod
|
|
|
|
//List all pods that access the PVC that has ClaimName: pvcname.
|
|
//NOTE: We use only one custom field selector to filter out pods that don't use this PVC.
|
|
if err := r.List(context.Background(), pods, client.InNamespace(tb.Namespace), client.MatchingFields{"spec.volumes.persistentvolumeclaim.claimname": pvcname}); err != nil {
|
|
return fmt.Errorf("List pods error: %v", err)
|
|
}
|
|
|
|
//Find a running pod that uses the PVC.
|
|
pod = findRunningPod(pods)
|
|
|
|
//If there is no running pod that uses the PVC, then: nodename == "".
|
|
//Else, nodename contains the name of the node that the pod is running on.
|
|
nodename = extractNodeName(pod)
|
|
|
|
//In this case, there is a running pod that uses the PVC, therefore we create
|
|
//a nodeAffinity field so that the Tensorboard server will be scheduled (if possible)
|
|
//on the same node as the running pod.
|
|
if nodename == "" {
|
|
return nil
|
|
}
|
|
*affinity = corev1.Affinity{
|
|
NodeAffinity: &corev1.NodeAffinity{
|
|
PreferredDuringSchedulingIgnoredDuringExecution: []corev1.PreferredSchedulingTerm{
|
|
{
|
|
Weight: 100,
|
|
Preference: corev1.NodeSelectorTerm{
|
|
MatchExpressions: []corev1.NodeSelectorRequirement{
|
|
{
|
|
Key: "kubernetes.io/hostname",
|
|
Operator: "In",
|
|
Values: []string{nodename},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
return nil
|
|
}
|
|
|
|
//Checks the value of 'RWO_PVC_SCHEDULING' env var (if present in the environment) and returns
|
|
//'true' or 'false' accordingly. If 'RWO_PVC_SCHEDULING' is NOT present, then the value of the
|
|
//returned boolean is set to 'false', so that the scheduling functionality is off by default.
|
|
func rwoPVCScheduling() (error, bool) {
|
|
if value, exists := os.LookupEnv("RWO_PVC_SCHEDULING"); !exists || value == "false" || value == "False" || value == "FALSE" {
|
|
return nil, false
|
|
} else if value == "true" || value == "True" || value == "TRUE" {
|
|
return nil, true
|
|
}
|
|
|
|
//If 'RWO_PVC_SCHEDULING' is present in the environment but has an invalid value,
|
|
//then an error is returned.
|
|
return fmt.Errorf("Invalid value for 'RWO_PVC_SCEDULING' env var."), false
|
|
}
|
|
|
|
func Deployment(ctx context.Context, r client.Client, deployment *appsv1.Deployment, log logr.Logger) error {
|
|
foundDeployment := &appsv1.Deployment{}
|
|
justCreated := false
|
|
if err := r.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, foundDeployment); err != nil {
|
|
if apierrs.IsNotFound(err) {
|
|
log.Info("Creating Deployment", "namespace", deployment.Namespace, "name", deployment.Name)
|
|
if err := r.Create(ctx, deployment); err != nil {
|
|
log.Error(err, "unable to create deployment")
|
|
return err
|
|
}
|
|
justCreated = true
|
|
} else {
|
|
log.Error(err, "error getting deployment")
|
|
return err
|
|
}
|
|
}
|
|
if !justCreated && CopyDeploymentSetFields(deployment, foundDeployment) {
|
|
log.Info("Updating Deployment", "namespace", deployment.Namespace, "name", deployment.Name)
|
|
if err := r.Update(ctx, foundDeployment); err != nil {
|
|
log.Error(err, "unable to update deployment")
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func CopyDeploymentSetFields(from, to *appsv1.Deployment) bool {
|
|
requireUpdate := false
|
|
for k, v := range to.Labels {
|
|
if from.Labels[k] != v {
|
|
requireUpdate = true
|
|
}
|
|
}
|
|
to.Labels = from.Labels
|
|
|
|
if *from.Spec.Replicas != *to.Spec.Replicas {
|
|
to.Spec.Replicas = from.Spec.Replicas
|
|
requireUpdate = true
|
|
}
|
|
|
|
if !reflect.DeepEqual(to.Spec.Template.Spec.Affinity, from.Spec.Template.Spec.Affinity) {
|
|
requireUpdate = true
|
|
}
|
|
to.Spec.Template.Spec.Affinity = from.Spec.Template.Spec.Affinity
|
|
|
|
return requireUpdate
|
|
}
|
|
|
|
func getEnvVariable(envVar string) (string, error) {
|
|
if lookupEnv, exists := os.LookupEnv(envVar); exists {
|
|
return lookupEnv, nil
|
|
} else {
|
|
return "", fmt.Errorf("environment variable %v is not set", envVar)
|
|
}
|
|
}
|