MPICH support (#562)

* Add support for MPICH * Fix CI errors * Temporary: manual trigger * Fix file name * Add an empty line at the end of the file * Fix formatting * Revert "Temporary: manual trigger" This reverts commit 15164a8b70. * fix formatting * Regenerate the mpi-operator.yaml * Adding an empy line at the end of Dockerfiles * Share the same entrypoin for Intel and MPICH * share hostfile generation between Intel and MPICH * Add validation test for MPICH * Fix formatting * Don't over engineer the tests - be explicit * add non-root tests for IntelMPI and MPICH
2023-06-16 18:57:36 +01:00 · 2023-06-16 18:57:36 +01:00 · 21f326d1d2
parent caa1112993
commit 21f326d1d2
26 changed files with 382 additions and 60 deletions
--- a/5
+++ b/5
@ -23,6 +23,7 @@ BASE_IMAGE_SSH_PORT?=2222
 IMG_BUILDER=docker
 PLATFORMS ?= linux/amd64
 INTEL_PLATFORMS ?= linux/amd64
+MPICH_PLATFORMS ?= linux/amd64
 LD_FLAGS_V2=" \
    -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \
    -X '${REPO_PATH}/pkg/version.Built=${Date}'   \
@ -71,6 +72,7 @@ test: bin/envtest scheduler-plugins-crd
 test_e2e: export TEST_MPI_OPERATOR_IMAGE=${IMAGE_NAME}:${RELEASE_VERSION}
 test_e2e: export TEST_OPENMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-openmpi
 test_e2e: export TEST_INTELMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-intel
+test_e2e: export TEST_MPICH_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-mpich
 test_e2e: bin/kubectl kind helm images test_images dev_manifest scheduler-plugins-chart
 	go test -v ./test/e2e/...

@ -108,6 +110,9 @@ test_images:
 	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/intel:${RELEASE_VERSION} build/base -f build/base/intel.Dockerfile
 	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) -t mpioperator/intel-builder:${RELEASE_VERSION} build/base -f build/base/intel-builder.Dockerfile
 	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-intel examples/v2beta1/pi -f examples/v2beta1/pi/intel.Dockerfile
+	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpich:${RELEASE_VERSION} build/base -f build/base/mpich.Dockerfile
+	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) -t mpioperator/mpich-builder:${RELEASE_VERSION} build/base -f build/base/mpich-builder.Dockerfile
+	${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-mpich examples/v2beta1/pi -f examples/v2beta1/pi/mpich.Dockerfile

 .PHONY: tidy
 tidy:
--- a/README.md
+++ b/README.md
@ -218,6 +218,12 @@ For a sample that uses Intel MPI, see:
 cat examples/pi/pi-intel.yaml
 ```

+For a sample that uses MPICH, see:
+
+```bash
+cat examples/pi/pi-mpich.yaml
+```
+
 ## Exposed Metrics

 | Metric name | Metric type | Description | Labels |
--- a/build/base/intel-entrypoint.sh
+++ b/build/base/intel-entrypoint.sh
--- a/build/base/intel.Dockerfile
+++ b/build/base/intel.Dockerfile
@ -22,5 +22,5 @@ RUN apt update \
        intel-oneapi-mpi \
    && rm -rf /var/lib/apt/lists/*

-COPY intel-entrypoint.sh /entrypoint.sh
+COPY entrypoint.sh /entrypoint.sh
 ENTRYPOINT ["/entrypoint.sh"]
--- a/build/base/mpich-builder.Dockerfile
+++ b/build/base/mpich-builder.Dockerfile
@ -0,0 +1,7 @@
+FROM debian:bullseye as builder
+
+RUN apt update \
+    && apt install -y --no-install-recommends \
+        g++ \
+        libmpich-dev \
+    && rm -rf /var/lib/apt/lists/*
--- a/build/base/mpich.Dockerfile
+++ b/build/base/mpich.Dockerfile
@ -0,0 +1,12 @@
+ARG BASE_LABEL
+
+FROM mpioperator/base:${BASE_LABEL}
+
+RUN apt update \
+    && apt install -y --no-install-recommends \
+        dnsutils \
+        mpich \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY entrypoint.sh /entrypoint.sh
+ENTRYPOINT ["/entrypoint.sh"]
--- a/deploy/v2beta1/mpi-operator.yaml
+++ b/deploy/v2beta1/mpi-operator.yaml
@ -58,10 +58,11 @@ spec:
              mpiImplementation:
                default: OpenMPI
                description: MPIImplementation is the MPI implementation. Options
-                  are "OpenMPI" (default) and "Intel".
+                  are "OpenMPI" (default), "Intel" and "MPICH".
                enum:
                - OpenMPI
                - Intel
+                - MPICH
                type: string
              mpiReplicaSpecs:
                additionalProperties:
--- a/examples/v2beta1/pi/Dockerfile
+++ b/examples/v2beta1/pi/Dockerfile
@ -5,7 +5,6 @@ FROM mpioperator/openmpi-builder:${BASE_LABEL} as builder
 COPY pi.cc /src/pi.cc
 RUN mpic++ /src/pi.cc -o /pi

-
 FROM mpioperator/openmpi:${BASE_LABEL}

 COPY --from=builder /pi /home/mpiuser/pi
--- a/examples/v2beta1/pi/README.md
+++ b/examples/v2beta1/pi/README.md
@ -19,9 +19,15 @@ For Intel MPI:
 docker build -t mpi-pi . -f intel.Dockerfile
 ```

+For MPICH:
+
+```bash
+docker build -t mpi-pi . -f mpich.Dockerfile
+```
+
 ## Create MPIJob

-Modify `pi.yaml` (for OpenMPI) or `pi-intel.yaml` (for Intel MPI) to set up the
+Modify `pi.yaml` (for OpenMPI), `pi-intel.yaml` (for Intel MPI) or `pi-mpich.yaml` (for MPICH) to set up the
 image name from your own registry.

 Then, run:
--- a/examples/v2beta1/pi/mpich.Dockerfile
+++ b/examples/v2beta1/pi/mpich.Dockerfile
@ -0,0 +1,10 @@
+ARG BASE_LABEL
+
+FROM mpioperator/mpich-builder:${BASE_LABEL} as builder
+
+COPY pi.cc /src/pi.cc
+RUN mpic++ /src/pi.cc -o /pi
+
+FROM mpioperator/mpich:${BASE_LABEL}
+
+COPY --from=builder /pi /home/mpiuser/pi
--- a/examples/v2beta1/pi/pi-mpich.yaml
+++ b/examples/v2beta1/pi/pi-mpich.yaml
@ -0,0 +1,54 @@
+apiVersion: kubeflow.org/v2beta1
+kind: MPIJob
+metadata:
+  name: pi
+spec:
+  slotsPerWorker: 1
+  runPolicy:
+    cleanPodPolicy: Running
+  sshAuthMountPath: /home/mpiuser/.ssh
+  mpiImplementation: MPICH
+  mpiReplicaSpecs:
+    Launcher:
+      replicas: 1
+      template:
+        spec:
+          containers:
+          - image: mpioperator/mpi-pi:mpich
+            imagePullPolicy: Always
+            name: mpi-launcher
+            securityContext:
+              runAsUser: 1000
+            args:
+            - mpirun
+            - -n
+            - "2"
+            - /home/mpiuser/pi
+            resources:
+              limits:
+                cpu: 1
+                memory: 1Gi
+    Worker:
+      replicas: 2
+      template:
+        spec:
+          containers:
+          - image: mpioperator/mpi-pi:mpich
+            imagePullPolicy: Always
+            name: mpi-worker
+            securityContext:
+              runAsUser: 1000
+            command:
+            args:
+            - /usr/sbin/sshd
+            - -De
+            - -f
+            - /home/mpiuser/.sshd_config
+            readinessProbe:
+              tcpSocket:
+                port: 2222
+              initialDelaySeconds: 2
+            resources:
+              limits:
+                cpu: 1
+                memory: 1Gi
--- a/manifests/base/kubeflow.org_mpijobs.yaml
+++ b/manifests/base/kubeflow.org_mpijobs.yaml
@ -35,10 +35,11 @@ spec:
              mpiImplementation:
                default: OpenMPI
                description: MPIImplementation is the MPI implementation. Options
-                  are "OpenMPI" (default) and "Intel".
+                  are "OpenMPI" (default), "Intel" and "MPICH".
                enum:
                - OpenMPI
                - Intel
+                - MPICH
                type: string
              mpiReplicaSpecs:
                additionalProperties:
--- a/pkg/apis/kubeflow/v2beta1/default_test.go
+++ b/pkg/apis/kubeflow/v2beta1/default_test.go
@ -38,7 +38,7 @@ func TestSetDefaults_MPIJob(t *testing.T) {
 				},
 			},
 		},
-		"base defaults overridden": {
+		"base defaults overridden (intel)": {
 			job: MPIJob{
 				Spec: MPIJobSpec{
 					SlotsPerWorker: newInt32(10),
@ -66,6 +66,34 @@ func TestSetDefaults_MPIJob(t *testing.T) {
 				},
 			},
 		},
+		"base defaults overridden (mpich)": {
+			job: MPIJob{
+				Spec: MPIJobSpec{
+					SlotsPerWorker: newInt32(10),
+					RunPolicy: RunPolicy{
+						CleanPodPolicy:          NewCleanPodPolicy(CleanPodPolicyRunning),
+						TTLSecondsAfterFinished: newInt32(2),
+						ActiveDeadlineSeconds:   newInt64(3),
+						BackoffLimit:            newInt32(4),
+					},
+					SSHAuthMountPath:  "/home/mpiuser/.ssh",
+					MPIImplementation: MPIImplementationMPICH,
+				},
+			},
+			want: MPIJob{
+				Spec: MPIJobSpec{
+					SlotsPerWorker: newInt32(10),
+					RunPolicy: RunPolicy{
+						CleanPodPolicy:          NewCleanPodPolicy(CleanPodPolicyRunning),
+						TTLSecondsAfterFinished: newInt32(2),
+						ActiveDeadlineSeconds:   newInt64(3),
+						BackoffLimit:            newInt32(4),
+					},
+					SSHAuthMountPath:  "/home/mpiuser/.ssh",
+					MPIImplementation: MPIImplementationMPICH,
+				},
+			},
+		},
 		"launcher defaults": {
 			job: MPIJob{
 				Spec: MPIJobSpec{
--- a/pkg/apis/kubeflow/v2beta1/openapi_generated.go
+++ b/pkg/apis/kubeflow/v2beta1/openapi_generated.go
@ -488,7 +488,7 @@ func schema_pkg_apis_kubeflow_v2beta1_MPIJobSpec(ref common.ReferenceCallback) c
 					},
 					"mpiImplementation": {
 						SchemaProps: spec.SchemaProps{
-							Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".",
+							Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".",
 							Type:        []string{"string"},
 							Format:      "",
 						},
--- a/pkg/apis/kubeflow/v2beta1/swagger.json
+++ b/pkg/apis/kubeflow/v2beta1/swagger.json
@ -322,7 +322,7 @@
      ],
      "properties": {
        "mpiImplementation": {
-          "description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".",
+          "description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".",
          "type": "string"
        },
        "mpiReplicaSpecs": {
--- a/pkg/apis/kubeflow/v2beta1/types.go
+++ b/pkg/apis/kubeflow/v2beta1/types.go
@ -155,8 +155,8 @@ type MPIJobSpec struct {
 	SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`

 	// MPIImplementation is the MPI implementation.
-	// Options are "OpenMPI" (default) and "Intel".
-	// +kubebuilder:validation:Enum:=OpenMPI;Intel
+	// Options are "OpenMPI" (default), "Intel" and "MPICH".
+	// +kubebuilder:validation:Enum:=OpenMPI;Intel;MPICH
 	// +kubebuilder:default:=OpenMPI
 	MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"`
 }
@ -177,6 +177,7 @@ type MPIImplementation string
 const (
 	MPIImplementationOpenMPI MPIImplementation = "OpenMPI"
 	MPIImplementationIntel   MPIImplementation = "Intel"
+	MPIImplementationMPICH   MPIImplementation = "MPICH"
 )

 // JobStatus represents the current observed state of the training Job.
--- a/pkg/apis/kubeflow/validation/validation.go
+++ b/pkg/apis/kubeflow/validation/validation.go
@ -35,7 +35,8 @@ var (

 	validMPIImplementations = sets.NewString(
 		string(kubeflow.MPIImplementationOpenMPI),
-		string(kubeflow.MPIImplementationIntel))
+		string(kubeflow.MPIImplementationIntel),
+		string(kubeflow.MPIImplementationMPICH))

 	validRestartPolicies = sets.NewString(
 		string(common.RestartPolicyNever),
--- a/pkg/apis/kubeflow/validation/validation_test.go
+++ b/pkg/apis/kubeflow/validation/validation_test.go
@ -31,7 +31,7 @@ func TestValidateMPIJob(t *testing.T) {
 		job      kubeflow.MPIJob
 		wantErrs field.ErrorList
 	}{
-		"valid": {
+		"valid (intel)": {
 			job: kubeflow.MPIJob{
 				ObjectMeta: metav1.ObjectMeta{
 					Name: "foo",
@ -57,7 +57,7 @@ func TestValidateMPIJob(t *testing.T) {
 				},
 			},
 		},
-		"valid with worker": {
+		"valid with worker (intel)": {
 			job: kubeflow.MPIJob{
 				ObjectMeta: metav1.ObjectMeta{
 					Name: "foo",
@ -92,6 +92,67 @@ func TestValidateMPIJob(t *testing.T) {
 				},
 			},
 		},
+		"valid (mpich)": {
+			job: kubeflow.MPIJob{
+				ObjectMeta: metav1.ObjectMeta{
+					Name: "foo",
+				},
+				Spec: kubeflow.MPIJobSpec{
+					SlotsPerWorker: newInt32(2),
+					RunPolicy: kubeflow.RunPolicy{
+						CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning),
+					},
+					SSHAuthMountPath:  "/home/mpiuser/.ssh",
+					MPIImplementation: kubeflow.MPIImplementationMPICH,
+					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+						kubeflow.MPIReplicaTypeLauncher: {
+							Replicas:      newInt32(1),
+							RestartPolicy: common.RestartPolicyNever,
+							Template: corev1.PodTemplateSpec{
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{{}},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+		"valid with worker (mpich)": {
+			job: kubeflow.MPIJob{
+				ObjectMeta: metav1.ObjectMeta{
+					Name: "foo",
+				},
+				Spec: kubeflow.MPIJobSpec{
+					SlotsPerWorker: newInt32(2),
+					RunPolicy: kubeflow.RunPolicy{
+						CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning),
+					},
+					SSHAuthMountPath:  "/home/mpiuser/.ssh",
+					MPIImplementation: kubeflow.MPIImplementationMPICH,
+					MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
+						kubeflow.MPIReplicaTypeLauncher: {
+							Replicas:      newInt32(1),
+							RestartPolicy: common.RestartPolicyOnFailure,
+							Template: corev1.PodTemplateSpec{
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{{}},
+								},
+							},
+						},
+						kubeflow.MPIReplicaTypeWorker: {
+							Replicas:      newInt32(3),
+							RestartPolicy: common.RestartPolicyNever,
+							Template: corev1.PodTemplateSpec{
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{{}},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
 		"empty job": {
 			wantErrs: field.ErrorList{
 				&field.Error{
--- a/pkg/controller/mpi_job_controller.go
+++ b/pkg/controller/mpi_job_controller.go
@ -202,6 +202,16 @@ var (
 			Value: "-o ConnectionAttempts=10",
 		},
 	}
+	mpichEnvVars = []corev1.EnvVar{
+		{
+			Name:  "HYDRA_HOST_FILE",
+			Value: fmt.Sprintf("%s/%s", configMountPath, hostfileName),
+		},
+		{
+			Name:  "HYDRA_LAUNCH_EXTRA_ARGS",
+			Value: "-o ConnectionAttempts=10",
+		},
+	}
 	nvidiaDisableEnvVars = []corev1.EnvVar{
 		{Name: "NVIDIA_VISIBLE_DEVICES"},
 		{Name: "NVIDIA_DRIVER_CAPABILITIES"},
@ -603,8 +613,9 @@ func (c *MPIJobController) syncHandler(key string) error {
 				return err
 			}
 		}
-		if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel {
-			// The Intel implementation requires workers to communicate with the
+		if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel ||
+			mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationMPICH {
+			// The Intel and MPICH implementations require workers to communicate with the
 			// launcher through its hostname. For that, we create a Service which
 			// has the same name as the launcher's hostname.
 			_, err := c.getOrCreateService(mpiJob, newLauncherService(mpiJob))
@ -1216,7 +1227,7 @@ func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int32) *corev1.ConfigM
 		switch mpiJob.Spec.MPIImplementation {
 		case kubeflow.MPIImplementationOpenMPI:
 			buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
-		case kubeflow.MPIImplementationIntel:
+		case kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH:
 			buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
 		}
 	}
@ -1444,6 +1455,8 @@ func (c *MPIJobController) newLauncherPodTemplate(mpiJob *kubeflow.MPIJob) corev
 			Name:  intelMPISlotsEnv,
 			Value: slotsStr,
 		})
+	case kubeflow.MPIImplementationMPICH:
+		container.Env = append(container.Env, mpichEnvVars...)
 	}

 	container.Env = append(container.Env,
--- a/pkg/controller/mpi_job_controller_test.go
+++ b/pkg/controller/mpi_job_controller_test.go
@ -500,7 +500,7 @@ func TestDoNothingWithInvalidMPIJob(t *testing.T) {
 }

 func TestAllResourcesCreated(t *testing.T) {
-	impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel}
+	impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH}
 	for _, implementation := range impls {
 		t.Run(string(implementation), func(t *testing.T) {
 			f := newFixture(t, "")
@ -524,7 +524,8 @@ func TestAllResourcesCreated(t *testing.T) {
 			for i := 0; i < 5; i++ {
 				f.expectCreatePodAction(fmjc.newWorker(mpiJobCopy, i))
 			}
-			if implementation == kubeflow.MPIImplementationIntel {
+			if implementation == kubeflow.MPIImplementationIntel ||
+				implementation == kubeflow.MPIImplementationMPICH {
 				f.expectCreateServiceAction(newLauncherService(mpiJobCopy))
 			}
 			f.expectCreateJobAction(fmjc.newLauncherJob(mpiJobCopy))
@ -796,7 +797,7 @@ func TestShutdownWorker(t *testing.T) {
 }

 func TestCreateSuspendedMPIJob(t *testing.T) {
-	impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel}
+	impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH}
 	for _, implementation := range impls {
 		t.Run(string(implementation), func(t *testing.T) {
 			f := newFixture(t, "")
@ -819,7 +820,8 @@ func TestCreateSuspendedMPIJob(t *testing.T) {
 				t.Fatalf("Failed creating secret")
 			}
 			f.expectCreateSecretAction(secret)
-			if implementation == kubeflow.MPIImplementationIntel {
+			if implementation == kubeflow.MPIImplementationIntel ||
+				implementation == kubeflow.MPIImplementationMPICH {
 				f.expectCreateServiceAction(newLauncherService(mpiJob))
 			}

@ -1583,6 +1585,31 @@ func TestNewConfigMap(t *testing.T) {
 				},
 			},
 		},
+		"MPICH with slots": {
+			mpiJob: &kubeflow.MPIJob{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "mpich-with-slots",
+					Namespace: "project-x",
+				},
+				Spec: kubeflow.MPIJobSpec{
+					SlotsPerWorker:    pointer.Int32(10),
+					MPIImplementation: kubeflow.MPIImplementationMPICH,
+				},
+			},
+			workerReplicas: 1,
+			wantCM: &corev1.ConfigMap{
+				ObjectMeta: metav1.ObjectMeta{
+					Name:      "mpich-with-slots-config",
+					Namespace: "project-x",
+					Labels: map[string]string{
+						"app": "mpich-with-slots",
+					},
+				},
+				Data: map[string]string{
+					"hostfile": "mpich-with-slots-worker-0.mpich-with-slots-worker.project-x.svc:10\n",
+				},
+			},
+		},
 	}
 	for name, tc := range testCases {
 		t.Run(name, func(t *testing.T) {
--- a/proposals/scalable-robust-operator.md
+++ b/proposals/scalable-robust-operator.md
@ -40,7 +40,7 @@ An MPIJob CRD describes the Job. Important fields include:
 - The launcher template, which should have a `mpirun` command.

 The images are expected to have the MPI implementation binaries (such as
-OpenMPI, MPICH or Intel MPI) the user’s MPI executable.
+OpenMPI, Intel MPI or MPICH) the user’s MPI executable.

 A controller processes the MPIJob, starting a Job with the following steps:
 1. Creates ConfigMap, which contains:
@ -148,7 +148,7 @@ following changes:
      doesn’t support changes to the completions field. This can be supported
      starting from 1.23. In the meantime, we can replicate the behavior by
      creating a new Job and doing Pod adoption.
-  - For Intel MPI, we also need a headless Service to front the launcher,
+  - For Intel MPI and MPICH, we also need a headless Service to front the launcher,
    because workers communicate back to the launcher using its hostname.
 - **Revert the use of the Job API for the launcher.**
  - The Job controller handles retries when the launcher or any of the workers fail.
--- a/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md
+++ b/sdk/python/v2beta1/docs/V2beta1MPIJobSpec.md
@ -4,7 +4,7 @@
 ## Properties
 Name | Type | Description | Notes
 ------------ | ------------- | ------------- | -------------
-**mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \&quot;OpenMPI\&quot; (default) and \&quot;Intel\&quot;. | [optional] 
+**mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \&quot;OpenMPI\&quot; (default), \&quot;Intel\&quot; and \&quot;MPICH\&quot;. | [optional] 
 **mpi_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | MPIReplicaSpecs contains maps from &#x60;MPIReplicaType&#x60; to &#x60;ReplicaSpec&#x60; that specify the MPI replicas to run. | 
 **run_policy** | [**V2beta1RunPolicy**](V2beta1RunPolicy.md) |  | [optional] 
 **slots_per_worker** | **int** | Specifies the number of slots per worker used in hostfile. Defaults to 1. | [optional] 
--- a/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py
+++ b/sdk/python/v2beta1/mpijob/models/v2beta1_mpi_job_spec.py
@ -75,7 +75,7 @@ class V2beta1MPIJobSpec(object):
    def mpi_implementation(self):
        """Gets the mpi_implementation of this V2beta1MPIJobSpec.  # noqa: E501

-        MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".  # noqa: E501
+        MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".  # noqa: E501

        :return: The mpi_implementation of this V2beta1MPIJobSpec.  # noqa: E501
        :rtype: str
@ -86,7 +86,7 @@ class V2beta1MPIJobSpec(object):
    def mpi_implementation(self, mpi_implementation):
        """Sets the mpi_implementation of this V2beta1MPIJobSpec.

-        MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".  # noqa: E501
+        MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".  # noqa: E501

        :param mpi_implementation: The mpi_implementation of this V2beta1MPIJobSpec.  # noqa: E501
        :type mpi_implementation: str
--- a/test/e2e/e2e_suite_test.go
+++ b/test/e2e/e2e_suite_test.go
@ -40,6 +40,7 @@ const (
 	envTestMPIOperatorImage    = "TEST_MPI_OPERATOR_IMAGE"
 	envTestOpenMPIImage        = "TEST_OPENMPI_IMAGE"
 	envTestIntelMPIImage       = "TEST_INTELMPI_IMAGE"
+	envTestMPICHImage          = "TEST_MPICH_IMAGE"
 	envTestKindImage           = "TEST_KIND_IMAGE"
 	envSchedulerPluginsVersion = "SCHEDULER_PLUGINS_VERSION"

@ -47,6 +48,7 @@ const (
 	defaultKindImage        = "kindest/node:v1.25.8"
 	defaultOpenMPIImage     = "mpioperator/mpi-pi:openmpi"
 	defaultIntelMPIImage    = "mpioperator/mpi-pi:intel"
+	defaultMPICHImage       = "mpioperator/mpi-pi:mpich"
 	rootPath                = "../.."
 	kubectlPath             = rootPath + "/bin/kubectl"
 	kindPath                = rootPath + "/bin/kind"
@ -71,6 +73,7 @@ var (
 	mpiOperatorImage            string
 	openMPIImage                string
 	intelMPIImage               string
+	mpichImage                  string
 	kindImage                   string
 	schedulerPluginsVersion     string

@ -86,6 +89,7 @@ func init() {
 	mpiOperatorImage = getEnvDefault(envTestMPIOperatorImage, defaultMPIOperatorImage)
 	openMPIImage = getEnvDefault(envTestOpenMPIImage, defaultOpenMPIImage)
 	intelMPIImage = getEnvDefault(envTestIntelMPIImage, defaultIntelMPIImage)
+	mpichImage = getEnvDefault(envTestMPICHImage, defaultMPICHImage)
 	kindImage = getEnvDefault(envTestKindImage, defaultKindImage)
 	schedulerPluginsVersion = getEnvDefault(envSchedulerPluginsVersion, defaultSchedulerPluginsVersion)
 }
@ -147,7 +151,7 @@ func bootstrapKindCluster() error {
 	if err != nil {
 		return fmt.Errorf("creating kind cluster: %w", err)
 	}
-	err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage)
+	err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage, mpichImage)
 	if err != nil {
 		return fmt.Errorf("loading container images: %w", err)
 	}
--- a/test/e2e/mpi_job_test.go
+++ b/test/e2e/mpi_job_test.go
@ -170,7 +170,6 @@ var _ = ginkgo.Describe("MPIJob", func() {
 	})

 	ginkgo.Context("with Intel Implementation", func() {
-		ginkgo.When("running as root", func() {
 		ginkgo.BeforeEach(func() {
 			mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationIntel
 			mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{
@ -209,12 +208,99 @@ var _ = ginkgo.Describe("MPIJob", func() {
 			}
 		})

+		ginkgo.When("running as root", func() {
 			ginkgo.It("should succeed", func() {
 				mpiJob := createJobAndWaitForCompletion(mpiJob)
 				expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
 			})
 		})

+		ginkgo.When("running as non-root", func() {
+			ginkgo.BeforeEach(func () {
+				mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh"
+
+				mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{
+					RunAsUser: newInt64(1000),
+				}
+				workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0]
+				workerContainer.SecurityContext = &corev1.SecurityContext{
+					RunAsUser: newInt64(1000),
+				}
+				workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config")
+			})
+
+			ginkgo.It("should succeed", func() {
+				mpiJob := createJobAndWaitForCompletion(mpiJob)
+				expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
+			})
+		})
+	})
+
+	ginkgo.Context("with MPICH Implementation", func() {
+		ginkgo.BeforeEach(func() {
+			mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationMPICH
+			mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{
+				{
+					Name:            "launcher",
+					Image:           mpichImage,
+					ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image.
+					Command:         []string{},              // uses entrypoint.
+					Args: []string{
+						"mpirun",
+						"-n",
+						"2",
+						"/home/mpiuser/pi",
+					},
+				},
+			}
+			mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers = []corev1.Container{
+				{
+					Name:            "worker",
+					Image:           mpichImage,
+					ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image.
+					Command:         []string{},              // uses entrypoint.
+					Args: []string{
+						"/usr/sbin/sshd",
+						"-De",
+					},
+					ReadinessProbe: &corev1.Probe{
+						ProbeHandler: corev1.ProbeHandler{
+							TCPSocket: &corev1.TCPSocketAction{
+								Port: intstr.FromInt(2222),
+							},
+						},
+						InitialDelaySeconds: 3,
+					},
+				},
+			}
+		})
+
+		ginkgo.When("running as root", func() {
+			ginkgo.It("should succeed", func() {
+				mpiJob := createJobAndWaitForCompletion(mpiJob)
+				expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
+			})
+		})
+
+		ginkgo.When("running as non-root", func() {
+			ginkgo.BeforeEach(func () {
+				mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh"
+
+				mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{
+					RunAsUser: newInt64(1000),
+				}
+				workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0]
+				workerContainer.SecurityContext = &corev1.SecurityContext{
+					RunAsUser: newInt64(1000),
+				}
+				workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config")
+			})
+
+			ginkgo.It("should succeed", func() {
+				mpiJob := createJobAndWaitForCompletion(mpiJob)
+				expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
+			})
+		})
 	})

 	ginkgo.Context("with scheduler-plugins", func() {