diff --git a/workspaces/controller/api/v1beta1/workspacekind_types.go b/workspaces/controller/api/v1beta1/workspacekind_types.go index ee06d55..e1bf0d2 100644 --- a/workspaces/controller/api/v1beta1/workspacekind_types.go +++ b/workspaces/controller/api/v1beta1/workspacekind_types.go @@ -312,7 +312,6 @@ type ImageConfigValue struct { Redirect *OptionRedirect `json:"redirect,omitempty"` // the spec of the image config - //+kubebuilder:validation:XValidation:rule="self == oldSelf",message="ImageConfig 'spec' is immutable" Spec ImageConfigSpec `json:"spec"` } @@ -396,7 +395,6 @@ type PodConfigValue struct { Redirect *OptionRedirect `json:"redirect,omitempty"` // the spec of the pod config - //+kubebuilder:validation:XValidation:rule="self == oldSelf",message="PodConfig 'spec' is immutable" Spec PodConfigSpec `json:"spec"` } diff --git a/workspaces/controller/config/crd/bases/kubeflow.org_workspacekinds.yaml b/workspaces/controller/config/crd/bases/kubeflow.org_workspacekinds.yaml index 41b7123..c1275a1 100644 --- a/workspaces/controller/config/crd/bases/kubeflow.org_workspacekinds.yaml +++ b/workspaces/controller/config/crd/bases/kubeflow.org_workspacekinds.yaml @@ -2368,9 +2368,6 @@ spec: - image - ports type: object - x-kubernetes-validations: - - message: ImageConfig 'spec' is immutable - rule: self == oldSelf required: - id - spawner @@ -3532,9 +3529,6 @@ spec: type: object type: array type: object - x-kubernetes-validations: - - message: PodConfig 'spec' is immutable - rule: self == oldSelf required: - id - spawner diff --git a/workspaces/controller/config/manager/kustomization.yaml b/workspaces/controller/config/manager/kustomization.yaml index 5c5f0b8..2c5fb1f 100644 --- a/workspaces/controller/config/manager/kustomization.yaml +++ b/workspaces/controller/config/manager/kustomization.yaml @@ -1,2 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization resources: - manager.yaml +images: +- name: controller + newName: ghcr.io/kubeflow/notebooks/workspace-controller + newTag: latest diff --git a/workspaces/controller/config/manager/manager.yaml b/workspaces/controller/config/manager/manager.yaml index dbc8075..1e6d660 100644 --- a/workspaces/controller/config/manager/manager.yaml +++ b/workspaces/controller/config/manager/manager.yaml @@ -65,6 +65,7 @@ spec: - --health-probe-bind-address=:8081 - --metrics-bind-address=0 image: controller:latest + imagePullPolicy: IfNotPresent name: manager securityContext: allowPrivilegeEscalation: false diff --git a/workspaces/controller/config/rbac/role.yaml b/workspaces/controller/config/rbac/role.yaml index 2b30df5..4a14adb 100644 --- a/workspaces/controller/config/rbac/role.yaml +++ b/workspaces/controller/config/rbac/role.yaml @@ -24,6 +24,14 @@ rules: - get - list - watch +- apiGroups: + - "" + resources: + - events + verbs: + - get + - list + - watch - apiGroups: - "" resources: diff --git a/workspaces/controller/config/samples/common/kustomization.yaml b/workspaces/controller/config/samples/common/kustomization.yaml new file mode 100644 index 0000000..bd1d677 --- /dev/null +++ b/workspaces/controller/config/samples/common/kustomization.yaml @@ -0,0 +1,4 @@ +resources: +- workspace_data_pvc.yaml +- workspace_home_pvc.yaml +- workspace_service_account.yaml \ No newline at end of file diff --git a/workspaces/controller/config/samples/common/workspace_data_pvc.yaml b/workspaces/controller/config/samples/common/workspace_data_pvc.yaml new file mode 100644 index 0000000..4e92525 --- /dev/null +++ b/workspaces/controller/config/samples/common/workspace_data_pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: workspace-data-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi \ No newline at end of file diff --git a/workspaces/controller/config/samples/common/workspace_home_pvc.yaml b/workspaces/controller/config/samples/common/workspace_home_pvc.yaml new file mode 100644 index 0000000..ff27ab2 --- /dev/null +++ b/workspaces/controller/config/samples/common/workspace_home_pvc.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: workspace-home-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi \ No newline at end of file diff --git a/workspaces/controller/config/samples/common/workspace_service_account.yaml b/workspaces/controller/config/samples/common/workspace_service_account.yaml new file mode 100644 index 0000000..86aa458 --- /dev/null +++ b/workspaces/controller/config/samples/common/workspace_service_account.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: default-editor \ No newline at end of file diff --git a/workspaces/controller/config/samples/v1beta1_workspace.yaml b/workspaces/controller/config/samples/jupyterlab_v1beta1_workspace.yaml similarity index 92% rename from workspaces/controller/config/samples/v1beta1_workspace.yaml rename to workspaces/controller/config/samples/jupyterlab_v1beta1_workspace.yaml index 13386e7..1c8e076 100644 --- a/workspaces/controller/config/samples/v1beta1_workspace.yaml +++ b/workspaces/controller/config/samples/jupyterlab_v1beta1_workspace.yaml @@ -1,8 +1,7 @@ apiVersion: kubeflow.org/v1beta1 kind: Workspace metadata: - name: my-workspace - namespace: default + name: jupyterlab-workspace spec: ## if the workspace is paused (no pods running) paused: false @@ -42,7 +41,7 @@ spec: ## - the mount path is defined in the WorkspaceKind under ## `spec.podTemplate.volumeMounts.home` ## - home: "my-home-pvc" + home: "workspace-home-pvc" ## additional PVCs to mount ## - these PVC must already exist in the Namespace @@ -51,7 +50,7 @@ spec: ## - if `readOnly` is true, the PVC must be ReadOnlyMany ## data: - - pvcName: "my-data-pvc" + - pvcName: "workspace-data-pvc" mountPath: "/data/my-data" readOnly: false @@ -65,7 +64,7 @@ spec: ## - options are defined in WorkspaceKind under ## `spec.podTemplate.options.imageConfig.values[]` ## - imageConfig: "jupyterlab_scipy_180" + imageConfig: "jupyterlab_scipy_190" ## the id of a podConfig option ## - options are defined in WorkspaceKind under diff --git a/workspaces/controller/config/samples/v1beta1_workspacekind.yaml b/workspaces/controller/config/samples/jupyterlab_v1beta1_workspacekind.yaml similarity index 97% rename from workspaces/controller/config/samples/v1beta1_workspacekind.yaml rename to workspaces/controller/config/samples/jupyterlab_v1beta1_workspacekind.yaml index 44cacc6..71ed533 100644 --- a/workspaces/controller/config/samples/v1beta1_workspacekind.yaml +++ b/workspaces/controller/config/samples/jupyterlab_v1beta1_workspacekind.yaml @@ -111,9 +111,21 @@ spec: ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#probe-v1-core ## probes: - startupProbe: {} - livenessProbe: {} - readinessProbe: {} + + ## startup probe for the "main" container + ## + #startupProbe: + # ... + + ## liveness probe for the "main" container + ## + #livenessProbe: + # ... + + ## readiness probe for the "main" container + ## + #readinessProbe: + # ... ## volume mount paths ## @@ -158,7 +170,7 @@ spec: ## https://github.com/kubeflow/kubeflow/blob/v1.8.0/components/example-notebook-servers/jupyter/s6/services.d/jupyterlab/run#L12 - name: "NB_PREFIX" value: |- - {{ httpPathPrefix "juptyerlab" }} + {{ httpPathPrefix "jupyterlab" }} ## extra volume mounts for Workspace Pods (MUTABLE) ## - spec for VolumeMount: diff --git a/workspaces/controller/config/samples/kustomization.yaml b/workspaces/controller/config/samples/kustomization.yaml index b276193..016ab77 100644 --- a/workspaces/controller/config/samples/kustomization.yaml +++ b/workspaces/controller/config/samples/kustomization.yaml @@ -1,5 +1,6 @@ -## Append samples of your project ## +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization resources: -- v1beta1_workspace.yaml -- v1beta1_workspacekind.yaml -#+kubebuilder:scaffold:manifestskustomizesamples +- ./common +- jupyterlab_v1beta1_workspace.yaml +- jupyterlab_v1beta1_workspacekind.yaml \ No newline at end of file diff --git a/workspaces/controller/internal/controller/workspace_controller.go b/workspaces/controller/internal/controller/workspace_controller.go index 6f9b8fa..bbda3f3 100644 --- a/workspaces/controller/internal/controller/workspace_controller.go +++ b/workspaces/controller/internal/controller/workspace_controller.go @@ -55,8 +55,9 @@ const ( workspaceSelectorLabel = "statefulset" // KubeBuilder cache fields - kbCacheWorkspaceOwnerKey = ".metadata.controller" - kbCacheWorkspaceKindField = ".spec.kind" + kfCacheEventInvolvedObjectUidKey = ".involvedObject.uid" + kbCacheWorkspaceOwnerKey = ".metadata.controller" + kbCacheWorkspaceKindField = ".spec.kind" // lengths for resource names generateNameSuffixLength = 6 @@ -64,21 +65,24 @@ const ( maxStatefulSetNameLength = 52 // https://github.com/kubernetes/kubernetes/issues/64023 // state message formats for Workspace status - stateMsgError = "Workspace has error" - stateMsgErrorUnknownWorkspaceKind = "Workspace references unknown WorkspaceKind: %s" - stateMsgErrorInvalidImageConfig = "Workspace has invalid imageConfig: %s" - stateMsgErrorInvalidPodConfig = "Workspace has invalid podConfig: %s" - stateMsgErrorGenFailureStatefulSet = "Workspace failed to generate StatefulSet with error: %s" - stateMsgErrorGenFailureService = "Workspace failed to generate Service with error: %s" - stateMsgErrorMultipleStatefulSets = "Workspace owns multiple StatefulSets: %s" - stateMsgErrorMultipleServices = "Workspace owns multiple Services: %s" - stateMsgErrorPodCrashLoopBackOff = "Workspace Pod is not running (CrashLoopBackOff)" - stateMsgErrorPodImagePullBackOff = "Workspace Pod is not running (ImagePullBackOff)" - stateMsgPaused = "Workspace is paused" - stateMsgPending = "Workspace is pending" - stateMsgRunning = "Workspace is running" - stateMsgTerminating = "Workspace is terminating" - stateMsgUnknown = "Workspace is in an unknown state" + stateMsgErrorUnknownWorkspaceKind = "Workspace references unknown WorkspaceKind: %s" + stateMsgErrorInvalidImageConfig = "Workspace has invalid imageConfig: %s" + stateMsgErrorInvalidPodConfig = "Workspace has invalid podConfig: %s" + stateMsgErrorGenFailureStatefulSet = "Workspace failed to generate StatefulSet with error: %s" + stateMsgErrorGenFailureService = "Workspace failed to generate Service with error: %s" + stateMsgErrorMultipleStatefulSets = "Workspace owns multiple StatefulSets: %s" + stateMsgErrorMultipleServices = "Workspace owns multiple Services: %s" + stateMsgErrorStatefulSetWarningEvent = "Workspace StatefulSet has warning event: %s" + stateMsgErrorPodUnschedulable = "Workspace Pod is unschedulable: %s" + stateMsgErrorPodSchedulingGate = "Workspace Pod is waiting for scheduling gate: %s" + stateMsgErrorPodSchedulerError = "Workspace Pod has scheduler error: %s" + stateMsgErrorContainerCrashLoopBackOff = "Workspace Container is not running (CrashLoopBackOff)" + stateMsgErrorContainerImagePullBackOff = "Workspace Container is not running (ImagePullBackOff)" + stateMsgPaused = "Workspace is paused" + stateMsgPending = "Workspace is pending" + stateMsgRunning = "Workspace is running" + stateMsgTerminating = "Workspace is terminating" + stateMsgUnknown = "Workspace is in an unknown state" ) var ( @@ -97,6 +101,7 @@ type WorkspaceReconciler struct { // +kubebuilder:rbac:groups=kubeflow.org,resources=workspacekinds,verbs=get;list;watch // +kubebuilder:rbac:groups=kubeflow.org,resources=workspacekinds/finalizers,verbs=update // +kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=create;delete;get;list;patch;update;watch +// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=pods,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=services,verbs=create;delete;get;list;patch;update;watch @@ -282,6 +287,7 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( } log.V(2).Info("StatefulSet updated", "statefulSet", statefulSetName) } + statefulSet = foundStatefulSet } // generate Service @@ -370,7 +376,10 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // // update Workspace status - workspaceStatus := generateWorkspaceStatus(workspace, pod) + workspaceStatus, err := r.generateWorkspaceStatus(ctx, log, workspace, pod, statefulSet) + if err != nil { + return ctrl.Result{}, err + } if !reflect.DeepEqual(workspace.Status, workspaceStatus) { workspace.Status = workspaceStatus if err := r.Status().Update(ctx, workspace); err != nil { @@ -388,6 +397,17 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( // SetupWithManager sets up the controller with the Manager. func (r *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { + // Index Event by `involvedObject.uid` + if err := mgr.GetFieldIndexer().IndexField(context.Background(), &corev1.Event{}, kfCacheEventInvolvedObjectUidKey, func(rawObj client.Object) []string { + event := rawObj.(*corev1.Event) + if event.InvolvedObject.UID == "" { + return nil + } + return []string{string(event.InvolvedObject.UID)} + }); err != nil { + return err + } + // Index StatefulSet by owner if err := mgr.GetFieldIndexer().IndexField(context.Background(), &appsv1.StatefulSet{}, kbCacheWorkspaceOwnerKey, func(rawObj client.Object) []string { statefulSet := rawObj.(*appsv1.StatefulSet) @@ -904,16 +924,59 @@ func generateService(workspace *kubefloworgv1beta1.Workspace, imageConfigSpec ku } // generateWorkspaceStatus generates a WorkspaceStatus for a Workspace -func generateWorkspaceStatus(workspace *kubefloworgv1beta1.Workspace, pod *corev1.Pod) kubefloworgv1beta1.WorkspaceStatus { +func (r *WorkspaceReconciler) generateWorkspaceStatus(ctx context.Context, log logr.Logger, workspace *kubefloworgv1beta1.Workspace, pod *corev1.Pod, statefulSet *appsv1.StatefulSet) (kubefloworgv1beta1.WorkspaceStatus, error) { status := workspace.Status + // cases where the Pod does not exist + if pod == nil { + // STATUS: Paused + if *workspace.Spec.Paused { + status.State = kubefloworgv1beta1.WorkspaceStatePaused + status.StateMessage = stateMsgPaused + return status, nil + } + + // there might be StatefulSet events + statefulSetEvents := &corev1.EventList{} + listOpts := &client.ListOptions{ + FieldSelector: fields.OneTermEqualSelector(kfCacheEventInvolvedObjectUidKey, string(statefulSet.UID)), + Namespace: statefulSet.Namespace, + } + if err := r.List(ctx, statefulSetEvents, listOpts); err != nil { + log.Error(err, "unable to list StatefulSet events") + return status, err + } + + // find the last StatefulSet warning event + var lastStsWarningEvent *corev1.Event + if len(statefulSetEvents.Items) > 0 { + for i, event := range statefulSetEvents.Items { + if event.Type == corev1.EventTypeWarning { + // + // TODO: ensure this actually works when there are multiple Warning events for this object + // + if lastStsWarningEvent == nil || lastStsWarningEvent.LastTimestamp.Time.Before(event.LastTimestamp.Time) { + lastStsWarningEvent = &statefulSetEvents.Items[i] + } + } + } + } + + // STATUS: Error (StatefulSet warning event) + if lastStsWarningEvent != nil { + status.State = kubefloworgv1beta1.WorkspaceStateError + status.StateMessage = fmt.Sprintf(stateMsgErrorStatefulSetWarningEvent, lastStsWarningEvent.Message) + return status, nil + } + } + // cases where the Pod exists if pod != nil { // STATUS: Terminating if pod.GetDeletionTimestamp() != nil { status.State = kubefloworgv1beta1.WorkspaceStateTerminating status.StateMessage = stateMsgTerminating - return status + return status, nil } // get the pod phase @@ -922,14 +985,46 @@ func generateWorkspaceStatus(workspace *kubefloworgv1beta1.Workspace, pod *corev // get the pod conditions // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions - podReady := false + var podScheduledCondition corev1.PodCondition + var podReadyCondition corev1.PodCondition for _, condition := range pod.Status.Conditions { switch condition.Type { + case corev1.PodScheduled: + podScheduledCondition = condition case corev1.PodReady: - podReady = condition.Status == corev1.ConditionTrue + podReadyCondition = condition } } + // unpack the pod conditions + podScheduled := podScheduledCondition.Status == corev1.ConditionTrue + podReady := podReadyCondition.Status == corev1.ConditionTrue + + // STATUS: Error (pod conditions) + if !podScheduled { + switch podScheduledCondition.Reason { + case corev1.PodReasonUnschedulable: + status.State = kubefloworgv1beta1.WorkspaceStateError + status.StateMessage = fmt.Sprintf(stateMsgErrorPodUnschedulable, podScheduledCondition.Message) + return status, nil + case corev1.PodReasonSchedulingGated: + status.State = kubefloworgv1beta1.WorkspaceStateError + status.StateMessage = fmt.Sprintf(stateMsgErrorPodSchedulingGate, podScheduledCondition.Message) + return status, nil + case corev1.PodReasonSchedulerError: + status.State = kubefloworgv1beta1.WorkspaceStateError + status.StateMessage = fmt.Sprintf(stateMsgErrorPodSchedulerError, podScheduledCondition.Message) + return status, nil + } + } + + // STATUS: Running + if podPhase == corev1.PodRunning && podReady { + status.State = kubefloworgv1beta1.WorkspaceStateRunning + status.StateMessage = stateMsgRunning + return status, nil + } + // get container status // https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states var containerStatus corev1.ContainerStatus @@ -943,24 +1038,17 @@ func generateWorkspaceStatus(workspace *kubefloworgv1beta1.Workspace, pod *corev // get the container state containerState := containerStatus.State - // STATUS: Running - if podPhase == corev1.PodRunning && podReady { - status.State = kubefloworgv1beta1.WorkspaceStateRunning - status.StateMessage = stateMsgRunning - return status - } - - // STATUS: Error + // STATUS: Error (container state) if containerState.Waiting != nil { if containerState.Waiting.Reason == "CrashLoopBackOff" { status.State = kubefloworgv1beta1.WorkspaceStateError - status.StateMessage = stateMsgErrorPodCrashLoopBackOff - return status + status.StateMessage = stateMsgErrorContainerCrashLoopBackOff + return status, nil } if containerState.Waiting.Reason == "ImagePullBackOff" { status.State = kubefloworgv1beta1.WorkspaceStateError - status.StateMessage = stateMsgErrorPodImagePullBackOff - return status + status.StateMessage = stateMsgErrorContainerImagePullBackOff + return status, nil } } @@ -968,22 +1056,12 @@ func generateWorkspaceStatus(workspace *kubefloworgv1beta1.Workspace, pod *corev if podPhase == corev1.PodPending { status.State = kubefloworgv1beta1.WorkspaceStatePending status.StateMessage = stateMsgPending - return status - } - } - - // cases where the Pod does not exist - if pod == nil { - // STATUS: Paused - if *workspace.Spec.Paused { - status.State = kubefloworgv1beta1.WorkspaceStatePaused - status.StateMessage = stateMsgPaused - return status + return status, nil } } // STATUS: Unknown status.State = kubefloworgv1beta1.WorkspaceStateUnknown status.StateMessage = stateMsgUnknown - return status + return status, nil } diff --git a/workspaces/controller/internal/controller/workspacekind_controller_test.go b/workspaces/controller/internal/controller/workspacekind_controller_test.go index f917fc5..dc062fd 100644 --- a/workspaces/controller/internal/controller/workspacekind_controller_test.go +++ b/workspaces/controller/internal/controller/workspacekind_controller_test.go @@ -20,8 +20,6 @@ import ( "fmt" "time" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" @@ -107,16 +105,6 @@ var _ = Describe("WorkspaceKind Controller", func() { newWorkspaceKind = workspaceKind.DeepCopy() newWorkspaceKind.Spec.PodTemplate.VolumeMounts.Home = "/home/jovyan/new" Expect(k8sClient.Patch(ctx, newWorkspaceKind, patch)).NotTo(Succeed()) - - By("failing to update the `spec.podTemplate.options.imageConfig.values[0].spec` field") - newWorkspaceKind = workspaceKind.DeepCopy() - newWorkspaceKind.Spec.PodTemplate.Options.ImageConfig.Values[0].Spec.Image = "new-image:latest" - Expect(k8sClient.Patch(ctx, newWorkspaceKind, patch)).NotTo(Succeed()) - - By("failing to update the `spec.podTemplate.options.podConfig.values[0].spec` field") - newWorkspaceKind = workspaceKind.DeepCopy() - newWorkspaceKind.Spec.PodTemplate.Options.PodConfig.Values[0].Spec.Resources.Requests[v1.ResourceCPU] = resource.MustParse("99") - Expect(k8sClient.Patch(ctx, newWorkspaceKind, patch)).NotTo(Succeed()) }) It("should not allow mutually exclusive fields to be set", func() { diff --git a/workspaces/controller/test/e2e/e2e_test.go b/workspaces/controller/test/e2e/e2e_test.go index 55cb7f7..5a12052 100644 --- a/workspaces/controller/test/e2e/e2e_test.go +++ b/workspaces/controller/test/e2e/e2e_test.go @@ -19,56 +19,119 @@ package e2e import ( "fmt" "os/exec" + "path/filepath" "time" + "github.com/kubeflow/notebooks/workspaces/controller/test/utils" + + corev1 "k8s.io/api/core/v1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - "github.com/kubeflow/notebooks/workspaces/controller/test/utils" + kubefloworgv1beta1 "github.com/kubeflow/notebooks/workspaces/controller/api/v1beta1" ) -const namespace = "workspace-controller-system" +const ( + // controller configs + controllerNamespace = "workspace-controller-system" + controllerImage = "ghcr.io/kubeflow/notebooks/workspace-controller:latest" + + // workspace configs + workspaceNamespace = "workspace-test" + workspaceName = "jupyterlab-workspace" + workspacePortInt = 8888 + workspacePortId = "jupyterlab" + + // curl image + curlImage = "curlimages/curl:8.9.1" + + // how long to wait in "Eventually" blocks + timeout = time.Second * 60 + + // how long to wait in "Consistently" blocks + duration = time.Second * 10 + + // how frequently to poll for conditions + interval = time.Second * 1 +) + +var ( + projectDir = "" +) var _ = Describe("controller", Ordered, func() { + BeforeAll(func() { - By("installing prometheus operator") - Expect(utils.InstallPrometheusOperator()).To(Succeed()) + projectDir, _ = utils.GetProjectDir() - By("installing the cert-manager") - Expect(utils.InstallCertManager()).To(Succeed()) + By("creating the controller namespace") + cmd := exec.Command("kubectl", "create", "ns", controllerNamespace) + _, _ = utils.Run(cmd) - By("creating manager namespace") - cmd := exec.Command("kubectl", "create", "ns", namespace) + By("creating the workspace namespace") + cmd = exec.Command("kubectl", "create", "ns", workspaceNamespace) + _, _ = utils.Run(cmd) + + By("creating common workspace resources") + cmd = exec.Command("kubectl", "apply", + "-k", filepath.Join(projectDir, "config/samples/common"), + "-n", workspaceNamespace, + ) _, _ = utils.Run(cmd) }) AfterAll(func() { - By("uninstalling the Prometheus manager bundle") - utils.UninstallPrometheusOperator() + By("deleting sample Workspace") + cmd := exec.Command("kubectl", "delete", "-f", + filepath.Join(projectDir, "config/samples/jupyterlab_v1beta1_workspace.yaml"), + "-n", workspaceNamespace, + ) + _, _ = utils.Run(cmd) - By("uninstalling the cert-manager bundle") - utils.UninstallCertManager() + By("deleting sample WorkspaceKind") + cmd = exec.Command("kubectl", "delete", + "-f", filepath.Join(projectDir, "config/samples/jupyterlab_v1beta1_workspacekind.yaml"), + ) + _, _ = utils.Run(cmd) - By("removing manager namespace") - cmd := exec.Command("kubectl", "delete", "ns", namespace) + By("deleting common workspace resources") + cmd = exec.Command("kubectl", "delete", + "-k", filepath.Join(projectDir, "config/samples/common"), + "-n", workspaceNamespace, + ) + _, _ = utils.Run(cmd) + + By("deleting controller namespace") + cmd = exec.Command("kubectl", "delete", "ns", controllerNamespace) + _, _ = utils.Run(cmd) + + By("deleting workspace namespace") + cmd = exec.Command("kubectl", "delete", "ns", workspaceNamespace) + _, _ = utils.Run(cmd) + + By("deleting the controller") + cmd = exec.Command("make", "undeploy") + _, _ = utils.Run(cmd) + + By("deleting CRDs") + cmd = exec.Command("make", "uninstall") _, _ = utils.Run(cmd) }) Context("Operator", func() { + It("should run successfully", func() { var controllerPodName string var err error - // projectimage stores the name of the image used in the example - var projectimage = "example.com/workspace-controller:v0.0.1" - - By("building the manager(Operator) image") - cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) + By("building the controller image") + cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", controllerImage)) _, err = utils.Run(cmd) ExpectWithOffset(1, err).NotTo(HaveOccurred()) - By("loading the the manager(Operator) image on Kind") - err = utils.LoadImageToKindClusterWithName(projectimage) + By("loading the controller image on Kind") + err = utils.LoadImageToKindClusterWithName(controllerImage) ExpectWithOffset(1, err).NotTo(HaveOccurred()) By("installing CRDs") @@ -77,25 +140,25 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(1, err).NotTo(HaveOccurred()) By("deploying the controller-manager") - cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) + cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", controllerImage)) _, err = utils.Run(cmd) ExpectWithOffset(1, err).NotTo(HaveOccurred()) By("validating that the controller-manager pod is running as expected") verifyControllerUp := func() error { - // Get pod name - - cmd = exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", + // Get controller pod name + cmd = exec.Command("kubectl", "get", "pods", + "-l", "control-plane=controller-manager", + "-n", controllerNamespace, "-o", "go-template={{ range .items }}"+ "{{ if not .metadata.deletionTimestamp }}"+ "{{ .metadata.name }}"+ "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", namespace, ) - podOutput, err := utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) + + // Ensure only 1 controller pod is running podNames := utils.GetNonEmptyLines(string(podOutput)) if len(podNames) != 1 { return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) @@ -103,20 +166,144 @@ var _ = Describe("controller", Ordered, func() { controllerPodName = podNames[0] ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) - // Validate pod status - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", namespace, + // Validate controller pod status + cmd = exec.Command("kubectl", "get", "pods", + controllerPodName, + "-n", controllerNamespace, + "-o", "jsonpath={.status.phase}", ) - status, err := utils.Run(cmd) + statusPhase, err := utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) - if string(status) != "Running" { - return fmt.Errorf("controller pod in %s status", status) + if string(statusPhase) != string(corev1.PodRunning) { + return fmt.Errorf("controller pod in %s phase", statusPhase) } return nil } - EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + EventuallyWithOffset(1, verifyControllerUp, timeout, interval).Should(Succeed()) + By("creating an instance of the WorkspaceKind CR") + EventuallyWithOffset(1, func() error { + cmd = exec.Command("kubectl", "apply", + "-f", filepath.Join(projectDir, "config/samples/jupyterlab_v1beta1_workspacekind.yaml"), + ) + _, err = utils.Run(cmd) + return err + }, timeout, interval).Should(Succeed()) + + By("creating an instance of the Workspace CR") + EventuallyWithOffset(1, func() error { + cmd = exec.Command("kubectl", "apply", + "-f", filepath.Join(projectDir, "config/samples/jupyterlab_v1beta1_workspace.yaml"), + "-n", workspaceNamespace, + ) + _, err = utils.Run(cmd) + return err + }, timeout, interval).Should(Succeed()) + + By("validating that the workspace has 'Running' state") + verifyWorkspaceState := func() error { + cmd = exec.Command("kubectl", "get", "workspaces", + workspaceName, + "-n", workspaceNamespace, + "-o", "jsonpath={.status.state}", + ) + statusState, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + + // If the workspace is not in the "Running" state get the state message + if string(statusState) != string(kubefloworgv1beta1.WorkspaceStateRunning) { + cmd = exec.Command("kubectl", "get", "workspaces", + workspaceName, + "-n", workspaceNamespace, + "-o", "jsonpath={.status.stateMessage}", + ) + statusStateMessage, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + return fmt.Errorf("workspace in %s state with message: %s", statusState, statusStateMessage) + } + return nil + } + EventuallyWithOffset(1, verifyWorkspaceState, timeout, interval).Should(Succeed()) + + By("validating that the workspace pod is running as expected") + verifyWorkspacePod := func() error { + // Get workspace pod name + cmd = exec.Command("kubectl", "get", "pods", + "-l", fmt.Sprintf("notebooks.kubeflow.org/workspace-name=%s", workspaceName), + "-n", workspaceNamespace, + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + ) + podOutput, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + + // Ensure only 1 workspace pod is running + podNames := utils.GetNonEmptyLines(string(podOutput)) + if len(podNames) != 1 { + return fmt.Errorf("expect 1 workspace pod running, but got %d", len(podNames)) + } + workspacePodName := podNames[0] + ExpectWithOffset(2, workspacePodName).Should(ContainSubstring(fmt.Sprintf("ws-%s", workspaceName))) + + // Validate workspace pod status + cmd = exec.Command("kubectl", "get", "pods", + workspacePodName, + "-n", workspaceNamespace, + "-o", "jsonpath={.status.phase}", + ) + statusPhase, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + if string(statusPhase) != string(corev1.PodRunning) { + return fmt.Errorf("workspace pod in %s phase", statusPhase) + } + return nil + } + EventuallyWithOffset(1, verifyWorkspacePod, timeout, interval).Should(Succeed()) + + By("validating that the workspace service was created") + getServiceName := func() (string, error) { + // Get the workspace service name + cmd := exec.Command("kubectl", "get", "services", + "-l", fmt.Sprintf("notebooks.kubeflow.org/workspace-name=%s", workspaceName), + "-n", workspaceNamespace, + "-o", "go-template={{ range .items }}"+ + "{{ if not .metadata.deletionTimestamp }}"+ + "{{ .metadata.name }}"+ + "{{ \"\\n\" }}{{ end }}{{ end }}", + ) + svcOutput, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + + // Ensure only 1 service is found + svcNames := utils.GetNonEmptyLines(string(svcOutput)) + if len(svcNames) != 1 { + return "", fmt.Errorf("expect 1 service found, but got %d", len(svcNames)) + } + workspaceSvcName := svcNames[0] + ExpectWithOffset(2, workspaceSvcName).Should(ContainSubstring(fmt.Sprintf("ws-%s", workspaceName))) + + return workspaceSvcName, nil + } + serviceName, err := getServiceName() + ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + By("validating that the workspace service endpoint is reachable") + serviceEndpoint := fmt.Sprintf("http://%s:%d/workspace/%s/%s/%s/lab", + serviceName, workspacePortInt, workspaceNamespace, workspaceName, workspacePortId, + ) + curlService := func() error { + // NOTE: this command should exit with a non-zero status code if the HTTP status code is >= 400 + cmd := exec.Command("kubectl", "run", + "tmp-curl", "-n", workspaceNamespace, + "--attach", "--command", fmt.Sprintf("--image=%s", curlImage), "--rm", "--restart=Never", "--", + "curl", "-sSL", "-o", "/dev/null", "--fail-with-body", serviceEndpoint, + ) + _, err := utils.Run(cmd) + return err + } + Eventually(curlService, timeout, interval).Should(Succeed()) }) }) }) diff --git a/workspaces/controller/test/utils/utils.go b/workspaces/controller/test/utils/utils.go index e3eb79b..7992e4a 100644 --- a/workspaces/controller/test/utils/utils.go +++ b/workspaces/controller/test/utils/utils.go @@ -25,27 +25,6 @@ import ( . "github.com/onsi/ginkgo/v2" //nolint:golint,revive ) -const ( - prometheusOperatorVersion = "v0.72.0" - prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + - "releases/download/%s/bundle.yaml" - - certmanagerVersion = "v1.14.4" - certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml" -) - -func warnError(err error) { - fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) -} - -// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. -func InstallPrometheusOperator() error { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "create", "-f", url) - _, err := Run(cmd) - return err -} - // Run executes the provided command within this context func Run(cmd *exec.Cmd) ([]byte, error) { dir, _ := GetProjectDir() @@ -66,48 +45,20 @@ func Run(cmd *exec.Cmd) ([]byte, error) { return output, nil } -// UninstallPrometheusOperator uninstalls the prometheus -func UninstallPrometheusOperator() { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} - -// UninstallCertManager uninstalls the cert manager -func UninstallCertManager() { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} - -// InstallCertManager installs the cert manager bundle. -func InstallCertManager() error { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "apply", "-f", url) - if _, err := Run(cmd); err != nil { - return err - } - // Wait for cert-manager-webhook to be ready, which can take time if cert-manager - // was re-installed after uninstalling on a cluster. - cmd = exec.Command("kubectl", "wait", "deployment.apps/cert-manager-webhook", - "--for", "condition=Available", - "--namespace", "cert-manager", - "--timeout", "5m", - ) - - _, err := Run(cmd) - return err -} - // LoadImageToKindCluster loads a local docker image to the kind cluster func LoadImageToKindClusterWithName(name string) error { - cluster := "kind" + var cluster string if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { cluster = v + } else { + // if `KIND_CLUSTER` is not set, get the cluster name from the kubeconfig + cmd := exec.Command("kubectl", "config", "current-context") + output, err := Run(cmd) + if err != nil { + return err + } + cluster = strings.TrimSpace(string(output)) + cluster = strings.Replace(cluster, "kind-", "", 1) } kindOptions := []string{"load", "docker-image", name, "--name", cluster} cmd := exec.Command("kind", kindOptions...)