MPICH support (#562)

* Add support for MPICH

* Fix CI errors

* Temporary: manual trigger

* Fix file name

* Add an empty line at the end of the file

* Fix formatting

* Revert "Temporary: manual trigger"

This reverts commit 15164a8b70.

* fix formatting

* Regenerate the mpi-operator.yaml

* Adding an empy line at the end of Dockerfiles

* Share the same entrypoin for Intel and MPICH

* share hostfile generation between Intel and MPICH

* Add validation test for MPICH

* Fix formatting

* Don't over engineer the tests - be explicit

* add non-root tests for IntelMPI and MPICH
This commit is contained in:
Mateusz Kubica 2023-06-16 18:57:36 +01:00 committed by GitHub
parent caa1112993
commit 21f326d1d2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 382 additions and 60 deletions

View File

@ -23,6 +23,7 @@ BASE_IMAGE_SSH_PORT?=2222
IMG_BUILDER=docker IMG_BUILDER=docker
PLATFORMS ?= linux/amd64 PLATFORMS ?= linux/amd64
INTEL_PLATFORMS ?= linux/amd64 INTEL_PLATFORMS ?= linux/amd64
MPICH_PLATFORMS ?= linux/amd64
LD_FLAGS_V2=" \ LD_FLAGS_V2=" \
-X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \ -X '${REPO_PATH}/pkg/version.GitSHA=${GitSHA}' \
-X '${REPO_PATH}/pkg/version.Built=${Date}' \ -X '${REPO_PATH}/pkg/version.Built=${Date}' \
@ -71,6 +72,7 @@ test: bin/envtest scheduler-plugins-crd
test_e2e: export TEST_MPI_OPERATOR_IMAGE=${IMAGE_NAME}:${RELEASE_VERSION} test_e2e: export TEST_MPI_OPERATOR_IMAGE=${IMAGE_NAME}:${RELEASE_VERSION}
test_e2e: export TEST_OPENMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-openmpi test_e2e: export TEST_OPENMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-openmpi
test_e2e: export TEST_INTELMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-intel test_e2e: export TEST_INTELMPI_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-intel
test_e2e: export TEST_MPICH_IMAGE=mpioperator/mpi-pi:${RELEASE_VERSION}-mpich
test_e2e: bin/kubectl kind helm images test_images dev_manifest scheduler-plugins-chart test_e2e: bin/kubectl kind helm images test_images dev_manifest scheduler-plugins-chart
go test -v ./test/e2e/... go test -v ./test/e2e/...
@ -108,6 +110,9 @@ test_images:
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/intel:${RELEASE_VERSION} build/base -f build/base/intel.Dockerfile ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/intel:${RELEASE_VERSION} build/base -f build/base/intel.Dockerfile
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) -t mpioperator/intel-builder:${RELEASE_VERSION} build/base -f build/base/intel-builder.Dockerfile ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) -t mpioperator/intel-builder:${RELEASE_VERSION} build/base -f build/base/intel-builder.Dockerfile
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-intel examples/v2beta1/pi -f examples/v2beta1/pi/intel.Dockerfile ${IMG_BUILDER} build $(BUILD_ARGS) --platform $(INTEL_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-intel examples/v2beta1/pi -f examples/v2beta1/pi/intel.Dockerfile
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpich:${RELEASE_VERSION} build/base -f build/base/mpich.Dockerfile
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) -t mpioperator/mpich-builder:${RELEASE_VERSION} build/base -f build/base/mpich-builder.Dockerfile
${IMG_BUILDER} build $(BUILD_ARGS) --platform $(MPICH_PLATFORMS) --build-arg BASE_LABEL=${RELEASE_VERSION} -t mpioperator/mpi-pi:${RELEASE_VERSION}-mpich examples/v2beta1/pi -f examples/v2beta1/pi/mpich.Dockerfile
.PHONY: tidy .PHONY: tidy
tidy: tidy:

View File

@ -218,6 +218,12 @@ For a sample that uses Intel MPI, see:
cat examples/pi/pi-intel.yaml cat examples/pi/pi-intel.yaml
``` ```
For a sample that uses MPICH, see:
```bash
cat examples/pi/pi-mpich.yaml
```
## Exposed Metrics ## Exposed Metrics
| Metric name | Metric type | Description | Labels | | Metric name | Metric type | Description | Labels |

View File

@ -22,5 +22,5 @@ RUN apt update \
intel-oneapi-mpi \ intel-oneapi-mpi \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
COPY intel-entrypoint.sh /entrypoint.sh COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]

View File

@ -0,0 +1,7 @@
FROM debian:bullseye as builder
RUN apt update \
&& apt install -y --no-install-recommends \
g++ \
libmpich-dev \
&& rm -rf /var/lib/apt/lists/*

View File

@ -0,0 +1,12 @@
ARG BASE_LABEL
FROM mpioperator/base:${BASE_LABEL}
RUN apt update \
&& apt install -y --no-install-recommends \
dnsutils \
mpich \
&& rm -rf /var/lib/apt/lists/*
COPY entrypoint.sh /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -58,10 +58,11 @@ spec:
mpiImplementation: mpiImplementation:
default: OpenMPI default: OpenMPI
description: MPIImplementation is the MPI implementation. Options description: MPIImplementation is the MPI implementation. Options
are "OpenMPI" (default) and "Intel". are "OpenMPI" (default), "Intel" and "MPICH".
enum: enum:
- OpenMPI - OpenMPI
- Intel - Intel
- MPICH
type: string type: string
mpiReplicaSpecs: mpiReplicaSpecs:
additionalProperties: additionalProperties:

View File

@ -5,7 +5,6 @@ FROM mpioperator/openmpi-builder:${BASE_LABEL} as builder
COPY pi.cc /src/pi.cc COPY pi.cc /src/pi.cc
RUN mpic++ /src/pi.cc -o /pi RUN mpic++ /src/pi.cc -o /pi
FROM mpioperator/openmpi:${BASE_LABEL} FROM mpioperator/openmpi:${BASE_LABEL}
COPY --from=builder /pi /home/mpiuser/pi COPY --from=builder /pi /home/mpiuser/pi

View File

@ -19,9 +19,15 @@ For Intel MPI:
docker build -t mpi-pi . -f intel.Dockerfile docker build -t mpi-pi . -f intel.Dockerfile
``` ```
For MPICH:
```bash
docker build -t mpi-pi . -f mpich.Dockerfile
```
## Create MPIJob ## Create MPIJob
Modify `pi.yaml` (for OpenMPI) or `pi-intel.yaml` (for Intel MPI) to set up the Modify `pi.yaml` (for OpenMPI), `pi-intel.yaml` (for Intel MPI) or `pi-mpich.yaml` (for MPICH) to set up the
image name from your own registry. image name from your own registry.
Then, run: Then, run:

View File

@ -0,0 +1,10 @@
ARG BASE_LABEL
FROM mpioperator/mpich-builder:${BASE_LABEL} as builder
COPY pi.cc /src/pi.cc
RUN mpic++ /src/pi.cc -o /pi
FROM mpioperator/mpich:${BASE_LABEL}
COPY --from=builder /pi /home/mpiuser/pi

View File

@ -0,0 +1,54 @@
apiVersion: kubeflow.org/v2beta1
kind: MPIJob
metadata:
name: pi
spec:
slotsPerWorker: 1
runPolicy:
cleanPodPolicy: Running
sshAuthMountPath: /home/mpiuser/.ssh
mpiImplementation: MPICH
mpiReplicaSpecs:
Launcher:
replicas: 1
template:
spec:
containers:
- image: mpioperator/mpi-pi:mpich
imagePullPolicy: Always
name: mpi-launcher
securityContext:
runAsUser: 1000
args:
- mpirun
- -n
- "2"
- /home/mpiuser/pi
resources:
limits:
cpu: 1
memory: 1Gi
Worker:
replicas: 2
template:
spec:
containers:
- image: mpioperator/mpi-pi:mpich
imagePullPolicy: Always
name: mpi-worker
securityContext:
runAsUser: 1000
command:
args:
- /usr/sbin/sshd
- -De
- -f
- /home/mpiuser/.sshd_config
readinessProbe:
tcpSocket:
port: 2222
initialDelaySeconds: 2
resources:
limits:
cpu: 1
memory: 1Gi

View File

@ -35,10 +35,11 @@ spec:
mpiImplementation: mpiImplementation:
default: OpenMPI default: OpenMPI
description: MPIImplementation is the MPI implementation. Options description: MPIImplementation is the MPI implementation. Options
are "OpenMPI" (default) and "Intel". are "OpenMPI" (default), "Intel" and "MPICH".
enum: enum:
- OpenMPI - OpenMPI
- Intel - Intel
- MPICH
type: string type: string
mpiReplicaSpecs: mpiReplicaSpecs:
additionalProperties: additionalProperties:

View File

@ -38,7 +38,7 @@ func TestSetDefaults_MPIJob(t *testing.T) {
}, },
}, },
}, },
"base defaults overridden": { "base defaults overridden (intel)": {
job: MPIJob{ job: MPIJob{
Spec: MPIJobSpec{ Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10), SlotsPerWorker: newInt32(10),
@ -66,6 +66,34 @@ func TestSetDefaults_MPIJob(t *testing.T) {
}, },
}, },
}, },
"base defaults overridden (mpich)": {
job: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
RunPolicy: RunPolicy{
CleanPodPolicy: NewCleanPodPolicy(CleanPodPolicyRunning),
TTLSecondsAfterFinished: newInt32(2),
ActiveDeadlineSeconds: newInt64(3),
BackoffLimit: newInt32(4),
},
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIImplementation: MPIImplementationMPICH,
},
},
want: MPIJob{
Spec: MPIJobSpec{
SlotsPerWorker: newInt32(10),
RunPolicy: RunPolicy{
CleanPodPolicy: NewCleanPodPolicy(CleanPodPolicyRunning),
TTLSecondsAfterFinished: newInt32(2),
ActiveDeadlineSeconds: newInt64(3),
BackoffLimit: newInt32(4),
},
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIImplementation: MPIImplementationMPICH,
},
},
},
"launcher defaults": { "launcher defaults": {
job: MPIJob{ job: MPIJob{
Spec: MPIJobSpec{ Spec: MPIJobSpec{

View File

@ -488,7 +488,7 @@ func schema_pkg_apis_kubeflow_v2beta1_MPIJobSpec(ref common.ReferenceCallback) c
}, },
"mpiImplementation": { "mpiImplementation": {
SchemaProps: spec.SchemaProps{ SchemaProps: spec.SchemaProps{
Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".", Description: "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".",
Type: []string{"string"}, Type: []string{"string"},
Format: "", Format: "",
}, },

View File

@ -322,7 +322,7 @@
], ],
"properties": { "properties": {
"mpiImplementation": { "mpiImplementation": {
"description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\".", "description": "MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\".",
"type": "string" "type": "string"
}, },
"mpiReplicaSpecs": { "mpiReplicaSpecs": {

View File

@ -155,8 +155,8 @@ type MPIJobSpec struct {
SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"` SSHAuthMountPath string `json:"sshAuthMountPath,omitempty"`
// MPIImplementation is the MPI implementation. // MPIImplementation is the MPI implementation.
// Options are "OpenMPI" (default) and "Intel". // Options are "OpenMPI" (default), "Intel" and "MPICH".
// +kubebuilder:validation:Enum:=OpenMPI;Intel // +kubebuilder:validation:Enum:=OpenMPI;Intel;MPICH
// +kubebuilder:default:=OpenMPI // +kubebuilder:default:=OpenMPI
MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"` MPIImplementation MPIImplementation `json:"mpiImplementation,omitempty"`
} }
@ -177,6 +177,7 @@ type MPIImplementation string
const ( const (
MPIImplementationOpenMPI MPIImplementation = "OpenMPI" MPIImplementationOpenMPI MPIImplementation = "OpenMPI"
MPIImplementationIntel MPIImplementation = "Intel" MPIImplementationIntel MPIImplementation = "Intel"
MPIImplementationMPICH MPIImplementation = "MPICH"
) )
// JobStatus represents the current observed state of the training Job. // JobStatus represents the current observed state of the training Job.

View File

@ -35,7 +35,8 @@ var (
validMPIImplementations = sets.NewString( validMPIImplementations = sets.NewString(
string(kubeflow.MPIImplementationOpenMPI), string(kubeflow.MPIImplementationOpenMPI),
string(kubeflow.MPIImplementationIntel)) string(kubeflow.MPIImplementationIntel),
string(kubeflow.MPIImplementationMPICH))
validRestartPolicies = sets.NewString( validRestartPolicies = sets.NewString(
string(common.RestartPolicyNever), string(common.RestartPolicyNever),

View File

@ -31,7 +31,7 @@ func TestValidateMPIJob(t *testing.T) {
job kubeflow.MPIJob job kubeflow.MPIJob
wantErrs field.ErrorList wantErrs field.ErrorList
}{ }{
"valid": { "valid (intel)": {
job: kubeflow.MPIJob{ job: kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "foo", Name: "foo",
@ -57,7 +57,7 @@ func TestValidateMPIJob(t *testing.T) {
}, },
}, },
}, },
"valid with worker": { "valid with worker (intel)": {
job: kubeflow.MPIJob{ job: kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{ ObjectMeta: metav1.ObjectMeta{
Name: "foo", Name: "foo",
@ -92,6 +92,67 @@ func TestValidateMPIJob(t *testing.T) {
}, },
}, },
}, },
"valid (mpich)": {
job: kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
},
Spec: kubeflow.MPIJobSpec{
SlotsPerWorker: newInt32(2),
RunPolicy: kubeflow.RunPolicy{
CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning),
},
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIImplementation: kubeflow.MPIImplementationMPICH,
MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
RestartPolicy: common.RestartPolicyNever,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
},
},
},
},
},
"valid with worker (mpich)": {
job: kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "foo",
},
Spec: kubeflow.MPIJobSpec{
SlotsPerWorker: newInt32(2),
RunPolicy: kubeflow.RunPolicy{
CleanPodPolicy: kubeflow.NewCleanPodPolicy(kubeflow.CleanPodPolicyRunning),
},
SSHAuthMountPath: "/home/mpiuser/.ssh",
MPIImplementation: kubeflow.MPIImplementationMPICH,
MPIReplicaSpecs: map[kubeflow.MPIReplicaType]*common.ReplicaSpec{
kubeflow.MPIReplicaTypeLauncher: {
Replicas: newInt32(1),
RestartPolicy: common.RestartPolicyOnFailure,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
},
kubeflow.MPIReplicaTypeWorker: {
Replicas: newInt32(3),
RestartPolicy: common.RestartPolicyNever,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{{}},
},
},
},
},
},
},
},
"empty job": { "empty job": {
wantErrs: field.ErrorList{ wantErrs: field.ErrorList{
&field.Error{ &field.Error{

View File

@ -202,6 +202,16 @@ var (
Value: "-o ConnectionAttempts=10", Value: "-o ConnectionAttempts=10",
}, },
} }
mpichEnvVars = []corev1.EnvVar{
{
Name: "HYDRA_HOST_FILE",
Value: fmt.Sprintf("%s/%s", configMountPath, hostfileName),
},
{
Name: "HYDRA_LAUNCH_EXTRA_ARGS",
Value: "-o ConnectionAttempts=10",
},
}
nvidiaDisableEnvVars = []corev1.EnvVar{ nvidiaDisableEnvVars = []corev1.EnvVar{
{Name: "NVIDIA_VISIBLE_DEVICES"}, {Name: "NVIDIA_VISIBLE_DEVICES"},
{Name: "NVIDIA_DRIVER_CAPABILITIES"}, {Name: "NVIDIA_DRIVER_CAPABILITIES"},
@ -603,8 +613,9 @@ func (c *MPIJobController) syncHandler(key string) error {
return err return err
} }
} }
if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel { if mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationIntel ||
// The Intel implementation requires workers to communicate with the mpiJob.Spec.MPIImplementation == kubeflow.MPIImplementationMPICH {
// The Intel and MPICH implementations require workers to communicate with the
// launcher through its hostname. For that, we create a Service which // launcher through its hostname. For that, we create a Service which
// has the same name as the launcher's hostname. // has the same name as the launcher's hostname.
_, err := c.getOrCreateService(mpiJob, newLauncherService(mpiJob)) _, err := c.getOrCreateService(mpiJob, newLauncherService(mpiJob))
@ -1216,7 +1227,7 @@ func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int32) *corev1.ConfigM
switch mpiJob.Spec.MPIImplementation { switch mpiJob.Spec.MPIImplementation {
case kubeflow.MPIImplementationOpenMPI: case kubeflow.MPIImplementationOpenMPI:
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots)) buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc slots=%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
case kubeflow.MPIImplementationIntel: case kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH:
buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots)) buffer.WriteString(fmt.Sprintf("%s%s-%d.%s.%s.svc:%d\n", mpiJob.Name, workerSuffix, i, workersService, mpiJob.Namespace, slots))
} }
} }
@ -1444,6 +1455,8 @@ func (c *MPIJobController) newLauncherPodTemplate(mpiJob *kubeflow.MPIJob) corev
Name: intelMPISlotsEnv, Name: intelMPISlotsEnv,
Value: slotsStr, Value: slotsStr,
}) })
case kubeflow.MPIImplementationMPICH:
container.Env = append(container.Env, mpichEnvVars...)
} }
container.Env = append(container.Env, container.Env = append(container.Env,

View File

@ -500,7 +500,7 @@ func TestDoNothingWithInvalidMPIJob(t *testing.T) {
} }
func TestAllResourcesCreated(t *testing.T) { func TestAllResourcesCreated(t *testing.T) {
impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel} impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH}
for _, implementation := range impls { for _, implementation := range impls {
t.Run(string(implementation), func(t *testing.T) { t.Run(string(implementation), func(t *testing.T) {
f := newFixture(t, "") f := newFixture(t, "")
@ -524,7 +524,8 @@ func TestAllResourcesCreated(t *testing.T) {
for i := 0; i < 5; i++ { for i := 0; i < 5; i++ {
f.expectCreatePodAction(fmjc.newWorker(mpiJobCopy, i)) f.expectCreatePodAction(fmjc.newWorker(mpiJobCopy, i))
} }
if implementation == kubeflow.MPIImplementationIntel { if implementation == kubeflow.MPIImplementationIntel ||
implementation == kubeflow.MPIImplementationMPICH {
f.expectCreateServiceAction(newLauncherService(mpiJobCopy)) f.expectCreateServiceAction(newLauncherService(mpiJobCopy))
} }
f.expectCreateJobAction(fmjc.newLauncherJob(mpiJobCopy)) f.expectCreateJobAction(fmjc.newLauncherJob(mpiJobCopy))
@ -796,7 +797,7 @@ func TestShutdownWorker(t *testing.T) {
} }
func TestCreateSuspendedMPIJob(t *testing.T) { func TestCreateSuspendedMPIJob(t *testing.T) {
impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel} impls := []kubeflow.MPIImplementation{kubeflow.MPIImplementationOpenMPI, kubeflow.MPIImplementationIntel, kubeflow.MPIImplementationMPICH}
for _, implementation := range impls { for _, implementation := range impls {
t.Run(string(implementation), func(t *testing.T) { t.Run(string(implementation), func(t *testing.T) {
f := newFixture(t, "") f := newFixture(t, "")
@ -819,7 +820,8 @@ func TestCreateSuspendedMPIJob(t *testing.T) {
t.Fatalf("Failed creating secret") t.Fatalf("Failed creating secret")
} }
f.expectCreateSecretAction(secret) f.expectCreateSecretAction(secret)
if implementation == kubeflow.MPIImplementationIntel { if implementation == kubeflow.MPIImplementationIntel ||
implementation == kubeflow.MPIImplementationMPICH {
f.expectCreateServiceAction(newLauncherService(mpiJob)) f.expectCreateServiceAction(newLauncherService(mpiJob))
} }
@ -1583,6 +1585,31 @@ func TestNewConfigMap(t *testing.T) {
}, },
}, },
}, },
"MPICH with slots": {
mpiJob: &kubeflow.MPIJob{
ObjectMeta: metav1.ObjectMeta{
Name: "mpich-with-slots",
Namespace: "project-x",
},
Spec: kubeflow.MPIJobSpec{
SlotsPerWorker: pointer.Int32(10),
MPIImplementation: kubeflow.MPIImplementationMPICH,
},
},
workerReplicas: 1,
wantCM: &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "mpich-with-slots-config",
Namespace: "project-x",
Labels: map[string]string{
"app": "mpich-with-slots",
},
},
Data: map[string]string{
"hostfile": "mpich-with-slots-worker-0.mpich-with-slots-worker.project-x.svc:10\n",
},
},
},
} }
for name, tc := range testCases { for name, tc := range testCases {
t.Run(name, func(t *testing.T) { t.Run(name, func(t *testing.T) {

View File

@ -40,7 +40,7 @@ An MPIJob CRD describes the Job. Important fields include:
- The launcher template, which should have a `mpirun` command. - The launcher template, which should have a `mpirun` command.
The images are expected to have the MPI implementation binaries (such as The images are expected to have the MPI implementation binaries (such as
OpenMPI, MPICH or Intel MPI) the users MPI executable. OpenMPI, Intel MPI or MPICH) the users MPI executable.
A controller processes the MPIJob, starting a Job with the following steps: A controller processes the MPIJob, starting a Job with the following steps:
1. Creates ConfigMap, which contains: 1. Creates ConfigMap, which contains:
@ -148,7 +148,7 @@ following changes:
doesnt support changes to the completions field. This can be supported doesnt support changes to the completions field. This can be supported
starting from 1.23. In the meantime, we can replicate the behavior by starting from 1.23. In the meantime, we can replicate the behavior by
creating a new Job and doing Pod adoption. creating a new Job and doing Pod adoption.
- For Intel MPI, we also need a headless Service to front the launcher, - For Intel MPI and MPICH, we also need a headless Service to front the launcher,
because workers communicate back to the launcher using its hostname. because workers communicate back to the launcher using its hostname.
- **Revert the use of the Job API for the launcher.** - **Revert the use of the Job API for the launcher.**
- The Job controller handles retries when the launcher or any of the workers fail. - The Job controller handles retries when the launcher or any of the workers fail.

View File

@ -4,7 +4,7 @@
## Properties ## Properties
Name | Type | Description | Notes Name | Type | Description | Notes
------------ | ------------- | ------------- | ------------- ------------ | ------------- | ------------- | -------------
**mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \&quot;OpenMPI\&quot; (default) and \&quot;Intel\&quot;. | [optional] **mpi_implementation** | **str** | MPIImplementation is the MPI implementation. Options are \&quot;OpenMPI\&quot; (default), \&quot;Intel\&quot; and \&quot;MPICH\&quot;. | [optional]
**mpi_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | MPIReplicaSpecs contains maps from &#x60;MPIReplicaType&#x60; to &#x60;ReplicaSpec&#x60; that specify the MPI replicas to run. | **mpi_replica_specs** | [**dict(str, V1ReplicaSpec)**](V1ReplicaSpec.md) | MPIReplicaSpecs contains maps from &#x60;MPIReplicaType&#x60; to &#x60;ReplicaSpec&#x60; that specify the MPI replicas to run. |
**run_policy** | [**V2beta1RunPolicy**](V2beta1RunPolicy.md) | | [optional] **run_policy** | [**V2beta1RunPolicy**](V2beta1RunPolicy.md) | | [optional]
**slots_per_worker** | **int** | Specifies the number of slots per worker used in hostfile. Defaults to 1. | [optional] **slots_per_worker** | **int** | Specifies the number of slots per worker used in hostfile. Defaults to 1. | [optional]

View File

@ -75,7 +75,7 @@ class V2beta1MPIJobSpec(object):
def mpi_implementation(self): def mpi_implementation(self):
"""Gets the mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 """Gets the mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501
MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\". # noqa: E501 MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\". # noqa: E501
:return: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 :return: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501
:rtype: str :rtype: str
@ -86,7 +86,7 @@ class V2beta1MPIJobSpec(object):
def mpi_implementation(self, mpi_implementation): def mpi_implementation(self, mpi_implementation):
"""Sets the mpi_implementation of this V2beta1MPIJobSpec. """Sets the mpi_implementation of this V2beta1MPIJobSpec.
MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default) and \"Intel\". # noqa: E501 MPIImplementation is the MPI implementation. Options are \"OpenMPI\" (default), \"Intel\" and \"MPICH\". # noqa: E501
:param mpi_implementation: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501 :param mpi_implementation: The mpi_implementation of this V2beta1MPIJobSpec. # noqa: E501
:type mpi_implementation: str :type mpi_implementation: str

View File

@ -40,6 +40,7 @@ const (
envTestMPIOperatorImage = "TEST_MPI_OPERATOR_IMAGE" envTestMPIOperatorImage = "TEST_MPI_OPERATOR_IMAGE"
envTestOpenMPIImage = "TEST_OPENMPI_IMAGE" envTestOpenMPIImage = "TEST_OPENMPI_IMAGE"
envTestIntelMPIImage = "TEST_INTELMPI_IMAGE" envTestIntelMPIImage = "TEST_INTELMPI_IMAGE"
envTestMPICHImage = "TEST_MPICH_IMAGE"
envTestKindImage = "TEST_KIND_IMAGE" envTestKindImage = "TEST_KIND_IMAGE"
envSchedulerPluginsVersion = "SCHEDULER_PLUGINS_VERSION" envSchedulerPluginsVersion = "SCHEDULER_PLUGINS_VERSION"
@ -47,6 +48,7 @@ const (
defaultKindImage = "kindest/node:v1.25.8" defaultKindImage = "kindest/node:v1.25.8"
defaultOpenMPIImage = "mpioperator/mpi-pi:openmpi" defaultOpenMPIImage = "mpioperator/mpi-pi:openmpi"
defaultIntelMPIImage = "mpioperator/mpi-pi:intel" defaultIntelMPIImage = "mpioperator/mpi-pi:intel"
defaultMPICHImage = "mpioperator/mpi-pi:mpich"
rootPath = "../.." rootPath = "../.."
kubectlPath = rootPath + "/bin/kubectl" kubectlPath = rootPath + "/bin/kubectl"
kindPath = rootPath + "/bin/kind" kindPath = rootPath + "/bin/kind"
@ -71,6 +73,7 @@ var (
mpiOperatorImage string mpiOperatorImage string
openMPIImage string openMPIImage string
intelMPIImage string intelMPIImage string
mpichImage string
kindImage string kindImage string
schedulerPluginsVersion string schedulerPluginsVersion string
@ -86,6 +89,7 @@ func init() {
mpiOperatorImage = getEnvDefault(envTestMPIOperatorImage, defaultMPIOperatorImage) mpiOperatorImage = getEnvDefault(envTestMPIOperatorImage, defaultMPIOperatorImage)
openMPIImage = getEnvDefault(envTestOpenMPIImage, defaultOpenMPIImage) openMPIImage = getEnvDefault(envTestOpenMPIImage, defaultOpenMPIImage)
intelMPIImage = getEnvDefault(envTestIntelMPIImage, defaultIntelMPIImage) intelMPIImage = getEnvDefault(envTestIntelMPIImage, defaultIntelMPIImage)
mpichImage = getEnvDefault(envTestMPICHImage, defaultMPICHImage)
kindImage = getEnvDefault(envTestKindImage, defaultKindImage) kindImage = getEnvDefault(envTestKindImage, defaultKindImage)
schedulerPluginsVersion = getEnvDefault(envSchedulerPluginsVersion, defaultSchedulerPluginsVersion) schedulerPluginsVersion = getEnvDefault(envSchedulerPluginsVersion, defaultSchedulerPluginsVersion)
} }
@ -147,7 +151,7 @@ func bootstrapKindCluster() error {
if err != nil { if err != nil {
return fmt.Errorf("creating kind cluster: %w", err) return fmt.Errorf("creating kind cluster: %w", err)
} }
err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage) err = runCommand(kindPath, "load", "docker-image", mpiOperatorImage, openMPIImage, intelMPIImage, mpichImage)
if err != nil { if err != nil {
return fmt.Errorf("loading container images: %w", err) return fmt.Errorf("loading container images: %w", err)
} }

View File

@ -170,7 +170,6 @@ var _ = ginkgo.Describe("MPIJob", func() {
}) })
ginkgo.Context("with Intel Implementation", func() { ginkgo.Context("with Intel Implementation", func() {
ginkgo.When("running as root", func() {
ginkgo.BeforeEach(func() { ginkgo.BeforeEach(func() {
mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationIntel mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationIntel
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{ mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{
@ -209,12 +208,99 @@ var _ = ginkgo.Describe("MPIJob", func() {
} }
}) })
ginkgo.When("running as root", func() {
ginkgo.It("should succeed", func() { ginkgo.It("should succeed", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob) mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded) expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
}) })
}) })
ginkgo.When("running as non-root", func() {
ginkgo.BeforeEach(func () {
mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh"
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{
RunAsUser: newInt64(1000),
}
workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0]
workerContainer.SecurityContext = &corev1.SecurityContext{
RunAsUser: newInt64(1000),
}
workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config")
})
ginkgo.It("should succeed", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
})
})
})
ginkgo.Context("with MPICH Implementation", func() {
ginkgo.BeforeEach(func() {
mpiJob.Spec.MPIImplementation = kubeflow.MPIImplementationMPICH
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers = []corev1.Container{
{
Name: "launcher",
Image: mpichImage,
ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image.
Command: []string{}, // uses entrypoint.
Args: []string{
"mpirun",
"-n",
"2",
"/home/mpiuser/pi",
},
},
}
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers = []corev1.Container{
{
Name: "worker",
Image: mpichImage,
ImagePullPolicy: corev1.PullIfNotPresent, // use locally built image.
Command: []string{}, // uses entrypoint.
Args: []string{
"/usr/sbin/sshd",
"-De",
},
ReadinessProbe: &corev1.Probe{
ProbeHandler: corev1.ProbeHandler{
TCPSocket: &corev1.TCPSocketAction{
Port: intstr.FromInt(2222),
},
},
InitialDelaySeconds: 3,
},
},
}
})
ginkgo.When("running as root", func() {
ginkgo.It("should succeed", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
})
})
ginkgo.When("running as non-root", func() {
ginkgo.BeforeEach(func () {
mpiJob.Spec.SSHAuthMountPath = "/home/mpiuser/.ssh"
mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext = &corev1.SecurityContext{
RunAsUser: newInt64(1000),
}
workerContainer := &mpiJob.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeWorker].Template.Spec.Containers[0]
workerContainer.SecurityContext = &corev1.SecurityContext{
RunAsUser: newInt64(1000),
}
workerContainer.Args = append(workerContainer.Args, "-f", "/home/mpiuser/.sshd_config")
})
ginkgo.It("should succeed", func() {
mpiJob := createJobAndWaitForCompletion(mpiJob)
expectConditionToBeTrue(mpiJob, kubeflow.JobSucceeded)
})
})
}) })
ginkgo.Context("with scheduler-plugins", func() { ginkgo.Context("with scheduler-plugins", func() {