diff --git a/Makefile b/Makefile index 5f22eba..1d69a37 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,7 @@ BASE_IMAGE_SSH_PORT?=2222 IMG_BUILDER=docker PLATFORMS ?= linux/amd64 INTEL_PLATFORMS ?= linux/amd64 +MPICH_PLATFORMS ?= linux/amd64 LD_FLAGS_V2=" \ -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \ -X '${REPO_PATH}/pkg/version.Built=${Date}' \ @@ -71,6 +72,7 @@ test: bin/envtest scheduler-plugins-crd test_e2e: export TEST_MPI_OPERATOR_IMAGE=${IMAGE_NAME}:${RELEASE_VERSION} test_e2e: export TEST_OPENMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-openmpi test_e2e: export TEST_INTELMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-intel +test_e2e: export TEST_MPICH_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-mpich test_e2e: bin/kubectl kind helm images test_images dev_manifest scheduler-plugins-chart go test -v ./test/e2e/... @@ -108,6 +110,9 @@ test_images: ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/intel:${RELEASE_VERSION} build/base -f build/base/intel.Dockerfile ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) -t mpioperator/intel-builder:${RELEASE_VERSION} build/base -f build/base/intel-builder.Dockerfile ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-intel examples/v2beta1/pi -f examples/v2beta1/pi/intel.Dockerfile + ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpich:${RELEASE_VERSION} build/base -f build/base/mpich.Dockerfile + ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) -t mpioperator/mpich-builder:${RELEASE_VERSION} build/base -f build/base/mpich-builder.Dockerfile + ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-mpich examples/v2beta1/pi -f examples/v2beta1/pi/mpich.Dockerfile .PHONY: tidy tidy: diff --git a/README.md b/README.md index 05445a5..725e001 100644 --- a/README.md +++ b/README.md @@ -218,6 +218,12 @@ For a sample that uses Intel MPI, see: cat examples/pi/pi-intel.yaml ``` +For a sample that uses MPICH, see: + +```bash +cat examples/pi/pi-mpich.yaml +``` + ## Exposed Metrics | Metric name | Metric type | Description | Labels | diff --git a/build/base/intel-entrypoint.sh b/build/base/entrypoint.sh similarity index 100% rename from build/base/intel-entrypoint.sh rename to build/base/entrypoint.sh diff --git a/build/base/intel.Dockerfile b/build/base/intel.Dockerfile index 02ee87e..e87a0e9 100644 --- a/build/base/intel.Dockerfile +++ b/build/base/intel.Dockerfile @@ -22,5 +22,5 @@ RUN apt update \ intel-oneapi-mpi \ && rm -rf /var/lib/apt/lists/* -COPY intel-entrypoint.sh /entrypoint.sh +COPY entrypoint.sh /entrypoint.sh ENTRYPOINT ["/entrypoint.sh"] diff --git a/build/base/mpich-builder.Dockerfile b/build/base/mpich-builder.Dockerfile new file mode 100644 index 0000000..5880b8a --- /dev/null +++ b/build/base/mpich-builder.Dockerfile @@ -0,0 +1,7 @@ +FROM debian:bullseye as builder + +RUN apt update \ + && apt install -y --no-install-recommends \ + g++ \ + libmpich-dev \ + && rm -rf /var/lib/apt/lists/* diff --git a/build/base/mpich.Dockerfile b/build/base/mpich.Dockerfile new file mode 100644 index 0000000..34fc015 --- /dev/null +++ b/build/base/mpich.Dockerfile @@ -0,0 +1,12 @@ +ARG BASE_LABEL + +FROM mpioperator/base:${BASE_LABEL} + +RUN apt update \ + && apt install -y --no-install-recommends \ + dnsutils \ + mpich \ + && rm -rf /var/lib/apt/lists/* + +COPY entrypoint.sh /entrypoint.sh +ENTRYPOINT ["/entrypoint.sh"] diff --git a/deploy/v2beta1/mpi-operator.yaml b/deploy/v2beta1/mpi-operator.yaml index 4bfd994..a971390 100644 --- a/deploy/v2beta1/mpi-operator.yaml +++ b/deploy/v2beta1/mpi-operator.yaml @@ -58,10 +58,11 @@ spec: mpiImplementation: default: OpenMPI description: MPIImplementation is the MPI implementation. Options - are "OpenMPI" (default) and "Intel". + are "OpenMPI" (default), "Intel" and "MPICH". enum: - OpenMPI - Intel + - MPICH type: string mpiReplicaSpecs: additionalProperties: diff --git a/examples/v2beta1/pi/Dockerfile b/examples/v2beta1/pi/Dockerfile index f0d4209..bf1b1e7 100644 --- a/examples/v2beta1/pi/Dockerfile +++ b/examples/v2beta1/pi/Dockerfile @@ -5,7 +5,6 @@ FROM mpioperator/openmpi-builder:${BASE_LABEL} as builder COPY pi.cc /src/pi.cc RUN mpic++ /src/pi.cc -o /pi - FROM mpioperator/openmpi:${BASE_LABEL} -COPY --from=builder /pi /home/mpiuser/pi \ No newline at end of file +COPY --from=builder /pi /home/mpiuser/pi diff --git a/examples/v2beta1/pi/README.md b/examples/v2beta1/pi/README.md index f3e3aab..c829c15 100644 --- a/examples/v2beta1/pi/README.md +++ b/examples/v2beta1/pi/README.md @@ -19,9 +19,15 @@ For Intel MPI: docker build -t mpi-pi . -f intel.Dockerfile ``` +For MPICH: + +```bash +docker build -t mpi-pi . -f mpich.Dockerfile +``` + ## Create MPIJob -Modify `pi.yaml` (for OpenMPI) or `pi-intel.yaml` (for Intel MPI) to set up the +Modify `pi.yaml` (for OpenMPI), `pi-intel.yaml` (for Intel MPI) or `pi-mpich.yaml` (for MPICH) to set up the image name from your own registry. Then, run: diff --git a/examples/v2beta1/pi/intel.Dockerfile b/examples/v2beta1/pi/intel.Dockerfile index 82219bf..fbe66f4 100644 --- a/examples/v2beta1/pi/intel.Dockerfile +++ b/examples/v2beta1/pi/intel.Dockerfile @@ -7,4 +7,4 @@ RUN bash -c "source /opt/intel/oneapi/setvars.sh && mpicxx /src/pi.cc -o /pi" FROM mpioperator/intel:${BASE_LABEL} -COPY --from=builder /pi /home/mpiuser/pi \ No newline at end of file +COPY --from=builder /pi /home/mpiuser/pi diff --git a/examples/v2beta1/pi/mpich.Dockerfile b/examples/v2beta1/pi/mpich.Dockerfile new file mode 100644 index 0000000..ae62427 --- /dev/null +++ b/examples/v2beta1/pi/mpich.Dockerfile @@ -0,0 +1,10 @@ +ARG BASE_LABEL + +FROM mpioperator/mpich-builder:${BASE_LABEL} as builder + +COPY pi.cc /src/pi.cc +RUN mpic++ /src/pi.cc -o /pi + +FROM mpioperator/mpich:${BASE_LABEL} + +COPY --from=builder /pi /home/mpiuser/pi diff --git a/examples/v2beta1/pi/pi-mpich.yaml b/examples/v2beta1/pi/pi-mpich.yaml new file mode 100644 index 0000000..7c4a70c --- /dev/null +++ b/examples/v2beta1/pi/pi-mpich.yaml @@ -0,0 +1,54 @@ +apiVersion: kubeflow.org/v2beta1 +kind: MPIJob +metadata: + name: pi +spec: + slotsPerWorker: 1 + runPolicy: + cleanPodPolicy: Running + sshAuthMountPath: /home/mpiuser/.ssh + mpiImplementation: MPICH + mpiReplicaSpecs: + Launcher: + replicas: 1 + template: + spec: + containers: + - image: mpioperator/mpi-pi:mpich + imagePullPolicy: Always + name: mpi-launcher + securityContext: + runAsUser: 1000 + args: + - mpirun + - -n + - "2" + - /home/mpiuser/pi + resources: + limits: + cpu: 1 + memory: 1Gi + Worker: + replicas: 2 + template: + spec: + containers: + - image: mpioperator/mpi-pi:mpich + imagePullPolicy: Always + name: mpi-worker + securityContext: + runAsUser: 1000 + command: + args: + - /usr/sbin/sshd + - -De + - -f + - /home/mpiuser/.sshd_config + readinessProbe: + tcpSocket: + port: 2222 + initialDelaySeconds: 2 + resources: + limits: + cpu: 1 + memory: 1Gi diff --git a/manifests/base/kubeflow.org_mpijobs.yaml b/manifests/base/kubeflow.org_mpijobs.yaml index 454414f..a594324 100644 --- a/manifests/base/kubeflow.org_mpijobs.yaml +++ b/manifests/base/kubeflow.org_mpijobs.yaml @@ -35,10 +35,11 @@ spec: mpiImplementation: default: OpenMPI description: MPIImplementation is the MPI implementation. Options - are "OpenMPI" (default) and "Intel". + are "OpenMPI" (default), "Intel" and "MPICH". enum: - OpenMPI - Intel + - MPICH type: string mpiReplicaSpecs: additionalProperties: diff --git a/pkg/apis/kubeflow/v2beta1/default_test.go b/pkg/apis/kubeflow/v2beta1/default_test.go index 80662d5..7c8850c 100644 --- a/pkg/apis/kubeflow/v2beta1/default_test.go +++ b/pkg/apis/kubeflow/v2beta1/default_test.go @@ -38,7 +38,7 @@ func TestSetDefaults_MPIJob(t *testing.T) { }, }, }, - "base defaults overridden": { + "base defaults overridden (intel)": { job: MPIJob{ Spec: MPIJobSpec{ SlotsPerWorker: newInt32(10), @@ -66,6 +66,34 @@ func TestSetDefaults_MPIJob(t *testing.T) { }, }, }, + "base defaults overridden (mpich)": { + job: MPIJob{ + Spec: MPIJobSpec{ + SlotsPerWorker: newInt32(10), + RunPolicy: RunPolicy{ + CleanPodPolicy: NewCleanPodPolicy(CleanPodPolicyRunning), + TTLSecondsAfterFinished: newInt32(2), + ActiveDeadlineSeconds: newInt64(3), + BackoffLimit: newInt32(4), + }, + SSHAuthMountPath: "/home/mpiuser/.ssh", + MPIImplementation: MPIImplementationMPICH, + }, + }, + want: MPIJob{ + Spec: MPIJobSpec{ + SlotsPerWorker: newInt32(10), + RunPolicy: RunPolicy{ + CleanPodPolicy: NewCleanPodPolicy(CleanPodPolicyRunning), + TTLSecondsAfterFinished: newInt32(2), + ActiveDeadlineSeconds: newInt64(3), + BackoffLimit: newInt32(4), + }, + SSHAuthMountPath: "/home/mpiuser/.ssh", + MPIImplementation: MPIImplementationMPICH, + }, + }, + }, "launcher defaults": { job: MPIJob{ Spec: MPIJobSpec{ diff --git a/pkg/apis/kubeflow/v2beta1/openapi_generated.go b/pkg/apis/kubeflow/v2beta1/openapi_generated.go index 0ce9deb..724f2db 100644 --- a/pkg/apis/kubeflow/v2beta1/openapi_generated.go +++ b/pkg/apis/kubeflow/v2beta1/openapi_generated.go @@ -488,7 +488,7 @@ func schema_pkg_apis_kubeflow_v2beta1_MPIJobSpec(ref common.ReferenceCallback) c }, "mpiImplementation": { SchemaProps: spec.SchemaProps{ - Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".", + Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".", Type: []string{"string"}, Format: "", }, diff --git a/pkg/apis/kubeflow/v2beta1/swagger.json b/pkg/apis/kubeflow/v2beta1/swagger.json index dcdc8ec..f43f759 100644 --- a/pkg/apis/kubeflow/v2beta1/swagger.json +++ b/pkg/apis/kubeflow/v2beta1/swagger.json @@ -322,7 +322,7 @@ ], "properties": { "mpiImplementation": { - "description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".", + "description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".", "type": "string" }, "mpiReplicaSpecs": { diff --git a/pkg/apis/kubeflow/v2beta1/types.go b/pkg/apis/kubeflow/v2beta1/types.go index dcf0277..98f7a94 100644 --- a/pkg/apis/kubeflow/v2beta1/types.go +++ b/pkg/apis/kubeflow/v2beta1/types.go @@ -155,8 +155,8 @@ type MPIJobSpec struct { SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"` // MPIImplementation is the MPI implementation. - // Options are "OpenMPI" (default) and "Intel". - // +kubebuilder:validation:Enum:=OpenMPI;Intel + // Options are "OpenMPI" (default), "Intel" and "MPICH". + // +kubebuilder:validation:Enum:=OpenMPI;Intel;MPICH // +kubebuilder:default:=OpenMPI MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"` } @@ -177,6 +177,7 @@ type MPIImplementation string const ( MPIImplementationOpenMPI MPIImplementation = "OpenMPI" MPIImplementationIntel MPIImplementation = "Intel" + MPIImplementationMPICH MPIImplementation = "MPICH" ) // JobStatus represents the current observed state of the training Job. diff --git a/pkg/apis/kubeflow/validation/validation.go b/pkg/apis/kubeflow/validation/validation.go index ff411a9..f39f1b4 100644 --- a/pkg/apis/kubeflow/validation/validation.go +++ b/pkg/apis/kubeflow/validation/validation.go @@ -35,7 +35,8 @@ var ( validMPIImplementations = sets.NewString( string(kubeflow.MPIImplementationOpenMPI), - string(kubeflow.MPIImplementationIntel)) + string(kubeflow.MPIImplementationIntel), + string(kubeflow.MPIImplementationMPICH)) validRestartPolicies = sets.NewString( string(common.RestartPolicyNever), diff --git a/pkg/apis/kubeflow/validation/validation_test.go b/pkg/apis/kubeflow/validation/validation_test.go index 31754d7..b5f2d6d 100644 --- a/pkg/apis/kubeflow/validation/validation_test.go +++ b/pkg/apis/kubeflow/validation/validation_test.go @@ -31,7 +31,7 @@ func TestValidateMPIJob(t *testing.T) { job kubeflow.MPIJob wantErrs field.ErrorList }{ - "valid": { + "valid (intel)": { job: kubeflow.MPIJob{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", @@ -57,7 +57,7 @@ func TestValidateMPIJob(t *testing.T) { }, }, }, - "valid with worker": { + "valid with worker (intel)": { job: kubeflow.MPIJob{ ObjectMeta: metav1.ObjectMeta{ Name: "foo", @@ -92,6 +92,67 @@ func TestValidateMPIJob(t *testing.T) { }, }, }, + "valid (mpich)": { + job: kubeflow.MPIJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + Spec: kubeflow.MPIJobSpec{ + SlotsPerWorker: newInt32(2), + RunPolicy: kubeflow.RunPolicy{ + CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning), + }, + SSHAuthMountPath: "/home/mpiuser/.ssh", + MPIImplementation: kubeflow.MPIImplementationMPICH, + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + kubeflow.MPIReplicaTypeLauncher: { + Replicas: newInt32(1), + RestartPolicy: common.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{}}, + }, + }, + }, + }, + }, + }, + }, + "valid with worker (mpich)": { + job: kubeflow.MPIJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + }, + Spec: kubeflow.MPIJobSpec{ + SlotsPerWorker: newInt32(2), + RunPolicy: kubeflow.RunPolicy{ + CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning), + }, + SSHAuthMountPath: "/home/mpiuser/.ssh", + MPIImplementation: kubeflow.MPIImplementationMPICH, + MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{ + kubeflow.MPIReplicaTypeLauncher: { + Replicas: newInt32(1), + RestartPolicy: common.RestartPolicyOnFailure, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{}}, + }, + }, + }, + kubeflow.MPIReplicaTypeWorker: { + Replicas: newInt32(3), + RestartPolicy: common.RestartPolicyNever, + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{}}, + }, + }, + }, + }, + }, + }, + }, "empty job": { wantErrs: field.ErrorList{ &field.Error{ diff --git a/pkg/controller/mpi_job_controller.go b/pkg/controller/mpi_job_controller.go index 8aa927a..c080f43 100644 --- a/pkg/controller/mpi_job_controller.go +++ b/pkg/controller/mpi_job_controller.go @@ -202,6 +202,16 @@ var ( Value: "-o ConnectionAttempts=10", }, } + mpichEnvVars = []corev1.EnvVar{ + { + Name: "HYDRA_HOST_FILE", + Value: fmt.Sprintf("%s/%s", configMountPath, hostfileName), + }, + { + Name: "HYDRA_LAUNCH_EXTRA_ARGS", + Value: "-o ConnectionAttempts=10", + }, + } nvidiaDisableEnvVars = []corev1.EnvVar{ {Name: "NVIDIA_VISIBLE_DEVICES"}, {Name: "NVIDIA_DRIVER_CAPABILITIES"}, @@ -603,8 +613,9 @@ func (c *MPIJobController) syncHandler(key string) error { return err } } - if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel { - // The Intel implementation requires workers to communicate with the + if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel || + mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationMPICH { + // The Intel and MPICH implementations require workers to communicate with the // launcher through its hostname. For that, we create a Service which // has the same name as the launcher's hostname. _, err := c.getOrCreateService(mpiJob, newLauncherService(mpiJob)) @@ -1216,7 +1227,7 @@ func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int32) *corev1.ConfigM switch mpiJob.Spec.MPIImplementation { case kubeflow.MPIImplementationOpenMPI: buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots)) - case kubeflow.MPIImplementationIntel: + case kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH: buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots)) } } @@ -1444,6 +1455,8 @@ func (c *MPIJobController) newLauncherPodTemplate(mpiJob *kubeflow.MPIJob) corev Name: intelMPISlotsEnv, Value: slotsStr, }) + case kubeflow.MPIImplementationMPICH: + container.Env = append(container.Env, mpichEnvVars...) } container.Env = append(container.Env, diff --git a/pkg/controller/mpi_job_controller_test.go b/pkg/controller/mpi_job_controller_test.go index 938d5ac..5aaa478 100644 --- a/pkg/controller/mpi_job_controller_test.go +++ b/pkg/controller/mpi_job_controller_test.go @@ -500,7 +500,7 @@ func TestDoNothingWithInvalidMPIJob(t *testing.T) { } func TestAllResourcesCreated(t *testing.T) { - impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel} + impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH} for _, implementation := range impls { t.Run(string(implementation), func(t *testing.T) { f := newFixture(t, "") @@ -524,7 +524,8 @@ func TestAllResourcesCreated(t *testing.T) { for i := 0; i < 5; i++ { f.expectCreatePodAction(fmjc.newWorker(mpiJobCopy, i)) } - if implementation == kubeflow.MPIImplementationIntel { + if implementation == kubeflow.MPIImplementationIntel || + implementation == kubeflow.MPIImplementationMPICH { f.expectCreateServiceAction(newLauncherService(mpiJobCopy)) } f.expectCreateJobAction(fmjc.newLauncherJob(mpiJobCopy)) @@ -796,7 +797,7 @@ func TestShutdownWorker(t *testing.T) { } func TestCreateSuspendedMPIJob(t *testing.T) { - impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel} + impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH} for _, implementation := range impls { t.Run(string(implementation), func(t *testing.T) { f := newFixture(t, "") @@ -819,7 +820,8 @@ func TestCreateSuspendedMPIJob(t *testing.T) { t.Fatalf("Failed creating secret") } f.expectCreateSecretAction(secret) - if implementation == kubeflow.MPIImplementationIntel { + if implementation == kubeflow.MPIImplementationIntel || + implementation == kubeflow.MPIImplementationMPICH { f.expectCreateServiceAction(newLauncherService(mpiJob)) } @@ -1583,6 +1585,31 @@ func TestNewConfigMap(t *testing.T) { }, }, }, + "MPICH with slots": { + mpiJob: &kubeflow.MPIJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mpich-with-slots", + Namespace: "project-x", + }, + Spec: kubeflow.MPIJobSpec{ + SlotsPerWorker: pointer.Int32(10), + MPIImplementation: kubeflow.MPIImplementationMPICH, + }, + }, + workerReplicas: 1, + wantCM: &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mpich-with-slots-config", + Namespace: "project-x", + Labels: map[string]string{ + "app": "mpich-with-slots", + }, + }, + Data: map[string]string{ + "hostfile": "mpich-with-slots-worker-0.mpich-with-slots-worker.project-x.svc:10\n", + }, + }, + }, } for name, tc := range testCases { t.Run(name, func(t *testing.T) { diff --git a/proposals/scalable-robust-operator.md b/proposals/scalable-robust-operator.md index 0189138..117fb6a 100644 --- a/proposals/scalable-robust-operator.md +++ b/proposals/scalable-robust-operator.md @@ -40,7 +40,7 @@ An MPIJob CRD describes the Job. Important fields include: - The launcher template, which should have a `mpirun` command. The images are expected to have the MPI implementation binaries (such as -OpenMPI, MPICH or Intel MPI) the user’s MPI executable. +OpenMPI, Intel MPI or MPICH) the user’s MPI executable. A controller processes the MPIJob, starting a Job with the following steps: 1. Creates ConfigMap, which contains: @@ -148,7 +148,7 @@ following changes: doesn’t support changes to the completions field. This can be supported starting from 1.23. In the meantime, we can replicate the behavior by creating a new Job and doing Pod adoption. - - For Intel MPI, we also need a headless Service to front the launcher, + - For Intel MPI and MPICH, we also need a headless Service to front the launcher, because workers communicate back to the launcher using its hostname. - **Revert the use of the Job API for the launcher.** - The Job controller handles retries when the launcher or any of the workers fail. diff --git a/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md b/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md index 122791b..23050ee 100644 --- a/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md +++ b/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md @@ -4,7 +4,7 @@ ## Properties Name | Type | Description | Notes ------------ | ------------- | ------------- | ------------- -**mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\". | [optional] +**mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\". | [optional] **mpi_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | MPIReplicaSpecs contains maps from `MPIReplicaType` to `ReplicaSpec` that specify the MPI replicas to run. | **run_policy** | [**V2beta1RunPolicy**](V2beta1RunPolicy.md) | | [optional] **slots_per_worker** | **int** | Specifies the number of slots per worker used in hostfile. Defaults to 1. | [optional] diff --git a/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py b/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py index 098e0c0..f43bff7 100644 --- a/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py +++ b/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py @@ -75,7 +75,7 @@ class V2beta1MPIJobSpec(object): def mpi_implementation(self): """Gets the mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 - MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\". # noqa: E501 + MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\". # noqa: E501 :return: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 :rtype: str @@ -86,7 +86,7 @@ class V2beta1MPIJobSpec(object): def mpi_implementation(self, mpi_implementation): """Sets the mpi_implementation of this V2beta1MPIJobSpec. - MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\". # noqa: E501 + MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\". # noqa: E501 :param mpi_implementation: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 :type mpi_implementation: str diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go index 72b030b..8564f4a 100644 --- a/test/e2e/e2e_suite_test.go +++ b/test/e2e/e2e_suite_test.go @@ -40,6 +40,7 @@ const ( envTestMPIOperatorImage = "TEST_MPI_OPERATOR_IMAGE" envTestOpenMPIImage = "TEST_OPENMPI_IMAGE" envTestIntelMPIImage = "TEST_INTELMPI_IMAGE" + envTestMPICHImage = "TEST_MPICH_IMAGE" envTestKindImage = "TEST_KIND_IMAGE" envSchedulerPluginsVersion = "SCHEDULER_PLUGINS_VERSION" @@ -47,6 +48,7 @@ const ( defaultKindImage = "kindest/node:v1.25.8" defaultOpenMPIImage = "mpioperator/mpi-pi:openmpi" defaultIntelMPIImage = "mpioperator/mpi-pi:intel" + defaultMPICHImage = "mpioperator/mpi-pi:mpich" rootPath = "../.." kubectlPath = rootPath + "/bin/kubectl" kindPath = rootPath + "/bin/kind" @@ -71,6 +73,7 @@ var ( mpiOperatorImage string openMPIImage string intelMPIImage string + mpichImage string kindImage string schedulerPluginsVersion string @@ -86,6 +89,7 @@ func init() { mpiOperatorImage = getEnvDefault(envTestMPIOperatorImage, defaultMPIOperatorImage) openMPIImage = getEnvDefault(envTestOpenMPIImage, defaultOpenMPIImage) intelMPIImage = getEnvDefault(envTestIntelMPIImage, defaultIntelMPIImage) + mpichImage = getEnvDefault(envTestMPICHImage, defaultMPICHImage) kindImage = getEnvDefault(envTestKindImage, defaultKindImage) schedulerPluginsVersion = getEnvDefault(envSchedulerPluginsVersion, defaultSchedulerPluginsVersion) } @@ -147,7 +151,7 @@ func bootstrapKindCluster() error { if err != nil { return fmt.Errorf("creating kind cluster: %w", err) } - err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage) + err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage, mpichImage) if err != nil { return fmt.Errorf("loading container images: %w", err) } diff --git a/test/e2e/mpi_job_test.go b/test/e2e/mpi_job_test.go index cd1d878..85791bf 100644 --- a/test/e2e/mpi_job_test.go +++ b/test/e2e/mpi_job_test.go @@ -170,43 +170,63 @@ var _ = ginkgo.Describe("MPIJob", func() { }) ginkgo.Context("with Intel Implementation", func() { - ginkgo.When("running as root", func() { - ginkgo.BeforeEach(func() { - mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationIntel - mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{ - { - Name: "launcher", - Image: intelMPIImage, - ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. - Command: []string{}, // uses entrypoint. - Args: []string{ - "mpirun", - "-n", - "2", - "/home/mpiuser/pi", - }, + ginkgo.BeforeEach(func() { + mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationIntel + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{ + { + Name: "launcher", + Image: intelMPIImage, + ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. + Command: []string{}, // uses entrypoint. + Args: []string{ + "mpirun", + "-n", + "2", + "/home/mpiuser/pi", }, - } - mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers = []corev1.Container{ - { - Name: "worker", - Image: intelMPIImage, - ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. - Command: []string{}, // uses entrypoint. - Args: []string{ - "/usr/sbin/sshd", - "-De", - }, - ReadinessProbe: &corev1.Probe{ - ProbeHandler: corev1.ProbeHandler{ - TCPSocket: &corev1.TCPSocketAction{ - Port: intstr.FromInt(2222), - }, + }, + } + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers = []corev1.Container{ + { + Name: "worker", + Image: intelMPIImage, + ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. + Command: []string{}, // uses entrypoint. + Args: []string{ + "/usr/sbin/sshd", + "-De", + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt(2222), }, - InitialDelaySeconds: 3, }, + InitialDelaySeconds: 3, }, + }, + } + }) + + ginkgo.When("running as root", func() { + ginkgo.It("should succeed", func() { + mpiJob := createJobAndWaitForCompletion(mpiJob) + expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded) + }) + }) + + ginkgo.When("running as non-root", func() { + ginkgo.BeforeEach(func () { + mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh" + + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + RunAsUser: newInt64(1000), } + workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0] + workerContainer.SecurityContext = &corev1.SecurityContext{ + RunAsUser: newInt64(1000), + } + workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config") }) ginkgo.It("should succeed", func() { @@ -214,7 +234,73 @@ var _ = ginkgo.Describe("MPIJob", func() { expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded) }) }) + }) + ginkgo.Context("with MPICH Implementation", func() { + ginkgo.BeforeEach(func() { + mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationMPICH + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{ + { + Name: "launcher", + Image: mpichImage, + ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. + Command: []string{}, // uses entrypoint. + Args: []string{ + "mpirun", + "-n", + "2", + "/home/mpiuser/pi", + }, + }, + } + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers = []corev1.Container{ + { + Name: "worker", + Image: mpichImage, + ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image. + Command: []string{}, // uses entrypoint. + Args: []string{ + "/usr/sbin/sshd", + "-De", + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + TCPSocket: &corev1.TCPSocketAction{ + Port: intstr.FromInt(2222), + }, + }, + InitialDelaySeconds: 3, + }, + }, + } + }) + + ginkgo.When("running as root", func() { + ginkgo.It("should succeed", func() { + mpiJob := createJobAndWaitForCompletion(mpiJob) + expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded) + }) + }) + + ginkgo.When("running as non-root", func() { + ginkgo.BeforeEach(func () { + mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh" + + mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{ + RunAsUser: newInt64(1000), + } + workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0] + workerContainer.SecurityContext = &corev1.SecurityContext{ + RunAsUser: newInt64(1000), + } + workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config") + }) + + ginkgo.It("should succeed", func() { + mpiJob := createJobAndWaitForCompletion(mpiJob) + expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded) + }) + }) }) ginkgo.Context("with scheduler-plugins", func() {