Mount SSH Secret directly on main container (#416)
Remove the init container for faster startup. Possible by disabling StrictModes in sshd_config.
This commit is contained in:
parent
0bccdb9672
commit
8f5bbd8203
|
|
@ -69,9 +69,13 @@ RUN apt-get install -y --no-install-recommends openssh-client openssh-server &&
|
||||||
mkdir -p /var/run/sshd
|
mkdir -p /var/run/sshd
|
||||||
|
|
||||||
# Allow OpenSSH to talk to containers without asking for confirmation
|
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||||
RUN cat /etc/ssh/ssh_config | grep -v StrictHostKeyChecking > /etc/ssh/ssh_config.new && \
|
# by disabling StrictHostKeyChecking.
|
||||||
echo " StrictHostKeyChecking no" >> /etc/ssh/ssh_config.new && \
|
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
|
||||||
mv /etc/ssh/ssh_config.new /etc/ssh/ssh_config
|
# to disable UserKnownHostsFile to avoid write permissions.
|
||||||
|
# Disabling StrictModes avoids directory and files read permission checks.
|
||||||
|
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
|
||||||
|
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
||||||
|
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
||||||
|
|
||||||
WORKDIR "/examples"
|
WORKDIR "/examples"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -24,5 +24,12 @@ RUN setcap CAP_NET_BIND_SERVICE=+eip /usr/sbin/sshd
|
||||||
RUN useradd -m mpiuser
|
RUN useradd -m mpiuser
|
||||||
WORKDIR /home/mpiuser
|
WORKDIR /home/mpiuser
|
||||||
COPY --chown=mpiuser sshd_config .sshd_config
|
COPY --chown=mpiuser sshd_config .sshd_config
|
||||||
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config
|
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||||
|
# by disabling StrictHostKeyChecking.
|
||||||
|
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
|
||||||
|
# to disable UserKnownHostsFile to avoid write permissions.
|
||||||
|
# Disabling StrictModes avoids directory and files read permission checks.
|
||||||
|
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
|
||||||
|
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
||||||
|
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
||||||
COPY --from=builder /pi /home/mpiuser/pi
|
COPY --from=builder /pi /home/mpiuser/pi
|
||||||
|
|
@ -52,6 +52,13 @@ WORKDIR /home/mpiuser
|
||||||
COPY intel-entrypoint.sh /entrypoint.sh
|
COPY intel-entrypoint.sh /entrypoint.sh
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
COPY --chown=mpiuser sshd_config .sshd_config
|
COPY --chown=mpiuser sshd_config .sshd_config
|
||||||
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config
|
# Allow OpenSSH to talk to containers without asking for confirmation
|
||||||
|
# by disabling StrictHostKeyChecking.
|
||||||
|
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
|
||||||
|
# to disable UserKnownHostsFile to avoid write permissions.
|
||||||
|
# Disabling StrictModes avoids directory and files read permission checks.
|
||||||
|
RUN sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
|
||||||
|
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
||||||
|
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
||||||
|
|
||||||
COPY --from=builder /pi /home/mpiuser/pi
|
COPY --from=builder /pi /home/mpiuser/pi
|
||||||
|
|
@ -1,2 +1,3 @@
|
||||||
PidFile /home/mpiuser/sshd.pid
|
PidFile /home/mpiuser/sshd.pid
|
||||||
HostKey /home/mpiuser/.ssh/id_rsa
|
HostKey /home/mpiuser/.ssh/id_rsa
|
||||||
|
StrictModes no
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,11 @@
|
||||||
FROM horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.6.0.post0-py3.7-cuda10.1
|
FROM horovod/horovod:0.20.0-tf2.3.0-torch1.6.0-mxnet1.6.0.post0-py3.7-cuda10.1
|
||||||
|
|
||||||
|
# mpi-operator mounts the .ssh folder from a Secret. For that to work, we need
|
||||||
|
# to disable UserKnownHostsFile to avoid write permissions.
|
||||||
|
# Disabling StrictModes avoids directory and files read permission checks.
|
||||||
|
RUN echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
|
||||||
|
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
|
||||||
|
|
||||||
RUN mkdir /tensorflow
|
RUN mkdir /tensorflow
|
||||||
WORKDIR "/tensorflow"
|
WORKDIR "/tensorflow"
|
||||||
RUN git clone https://github.com/tensorflow/benchmarks
|
RUN git clone https://github.com/tensorflow/benchmarks
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,6 @@ type ServerOption struct {
|
||||||
LockNamespace string
|
LockNamespace string
|
||||||
QPS int
|
QPS int
|
||||||
Burst int
|
Burst int
|
||||||
ScriptingImage string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewServerOption creates a new CMServer with a default config.
|
// NewServerOption creates a new CMServer with a default config.
|
||||||
|
|
@ -69,6 +68,4 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) {
|
||||||
|
|
||||||
fs.IntVar(&s.QPS, "kube-api-qps", 5, "QPS indicates the maximum QPS to the master from this client.")
|
fs.IntVar(&s.QPS, "kube-api-qps", 5, "QPS indicates the maximum QPS to the master from this client.")
|
||||||
fs.IntVar(&s.Burst, "kube-api-burst", 10, "Maximum burst for throttle.")
|
fs.IntVar(&s.Burst, "kube-api-burst", 10, "Maximum burst for throttle.")
|
||||||
|
|
||||||
fs.StringVar(&s.ScriptingImage, "scripting-image", "alpine:3.14", "Container image used for scripting, such as in init containers.")
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -161,8 +161,7 @@ func Run(opt *options.ServerOption) error {
|
||||||
kubeInformerFactory.Core().V1().Pods(),
|
kubeInformerFactory.Core().V1().Pods(),
|
||||||
podgroupsInformer,
|
podgroupsInformer,
|
||||||
kubeflowInformerFactory.Kubeflow().V2beta1().MPIJobs(),
|
kubeflowInformerFactory.Kubeflow().V2beta1().MPIJobs(),
|
||||||
opt.GangSchedulingName,
|
opt.GangSchedulingName)
|
||||||
opt.ScriptingImage)
|
|
||||||
|
|
||||||
go kubeInformerFactory.Start(ctx.Done())
|
go kubeInformerFactory.Start(ctx.Done())
|
||||||
go kubeflowInformerFactory.Start(ctx.Done())
|
go kubeflowInformerFactory.Start(ctx.Done())
|
||||||
|
|
|
||||||
|
|
@ -73,9 +73,7 @@ const (
|
||||||
discoverHostsScriptName = "discover_hosts.sh"
|
discoverHostsScriptName = "discover_hosts.sh"
|
||||||
sshAuthSecretSuffix = "-ssh"
|
sshAuthSecretSuffix = "-ssh"
|
||||||
sshAuthVolume = "ssh-auth"
|
sshAuthVolume = "ssh-auth"
|
||||||
sshAuthMountPath = "/mnt/ssh"
|
rootSSHPath = "/root/.ssh"
|
||||||
sshHomeInitMountPath = "/mnt/home-ssh"
|
|
||||||
sshHomeVolume = "ssh-home"
|
|
||||||
launcher = "launcher"
|
launcher = "launcher"
|
||||||
worker = "worker"
|
worker = "worker"
|
||||||
launcherSuffix = "-launcher"
|
launcherSuffix = "-launcher"
|
||||||
|
|
@ -242,8 +240,6 @@ type MPIJobController struct {
|
||||||
recorder record.EventRecorder
|
recorder record.EventRecorder
|
||||||
// Gang scheduler name to use
|
// Gang scheduler name to use
|
||||||
gangSchedulerName string
|
gangSchedulerName string
|
||||||
// Container image used for scripting.
|
|
||||||
scriptingImage string
|
|
||||||
|
|
||||||
// To allow injection of updateStatus for testing.
|
// To allow injection of updateStatus for testing.
|
||||||
updateStatusHandler func(mpijob *kubeflow.MPIJob) error
|
updateStatusHandler func(mpijob *kubeflow.MPIJob) error
|
||||||
|
|
@ -261,7 +257,7 @@ func NewMPIJobController(
|
||||||
podInformer coreinformers.PodInformer,
|
podInformer coreinformers.PodInformer,
|
||||||
podgroupsInformer podgroupsinformer.PodGroupInformer,
|
podgroupsInformer podgroupsinformer.PodGroupInformer,
|
||||||
mpiJobInformer informers.MPIJobInformer,
|
mpiJobInformer informers.MPIJobInformer,
|
||||||
gangSchedulerName, scriptingImage string) *MPIJobController {
|
gangSchedulerName string) *MPIJobController {
|
||||||
|
|
||||||
// Create event broadcaster.
|
// Create event broadcaster.
|
||||||
klog.V(4).Info("Creating event broadcaster")
|
klog.V(4).Info("Creating event broadcaster")
|
||||||
|
|
@ -298,7 +294,6 @@ func NewMPIJobController(
|
||||||
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "MPIJobs"),
|
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "MPIJobs"),
|
||||||
recorder: recorder,
|
recorder: recorder,
|
||||||
gangSchedulerName: gangSchedulerName,
|
gangSchedulerName: gangSchedulerName,
|
||||||
scriptingImage: scriptingImage,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
controller.updateStatusHandler = controller.doUpdateJobStatus
|
controller.updateStatusHandler = controller.doUpdateJobStatus
|
||||||
|
|
@ -1516,57 +1511,28 @@ func workerReplicas(job *kubeflow.MPIJob) int32 {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *MPIJobController) setupSSHOnPod(podSpec *corev1.PodSpec, job *kubeflow.MPIJob) {
|
func (c *MPIJobController) setupSSHOnPod(podSpec *corev1.PodSpec, job *kubeflow.MPIJob) {
|
||||||
|
var mode *int32
|
||||||
|
if job.Spec.SSHAuthMountPath == rootSSHPath {
|
||||||
|
mode = newInt32(0600)
|
||||||
|
}
|
||||||
|
mainContainer := &podSpec.Containers[0]
|
||||||
podSpec.Volumes = append(podSpec.Volumes,
|
podSpec.Volumes = append(podSpec.Volumes,
|
||||||
corev1.Volume{
|
corev1.Volume{
|
||||||
Name: sshAuthVolume,
|
Name: sshAuthVolume,
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
Secret: &corev1.SecretVolumeSource{
|
Secret: &corev1.SecretVolumeSource{
|
||||||
SecretName: job.Name + sshAuthSecretSuffix,
|
DefaultMode: mode,
|
||||||
Items: sshVolumeItems,
|
SecretName: job.Name + sshAuthSecretSuffix,
|
||||||
|
Items: sshVolumeItems,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
|
||||||
corev1.Volume{
|
|
||||||
Name: sshHomeVolume,
|
|
||||||
VolumeSource: corev1.VolumeSource{
|
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
|
||||||
},
|
|
||||||
})
|
})
|
||||||
|
|
||||||
mainContainer := &podSpec.Containers[0]
|
|
||||||
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts,
|
mainContainer.VolumeMounts = append(mainContainer.VolumeMounts,
|
||||||
corev1.VolumeMount{
|
corev1.VolumeMount{
|
||||||
Name: sshHomeVolume,
|
Name: sshAuthVolume,
|
||||||
MountPath: job.Spec.SSHAuthMountPath,
|
MountPath: job.Spec.SSHAuthMountPath,
|
||||||
})
|
})
|
||||||
|
|
||||||
// The init script sets the permissions of the ssh folder in the user's home
|
|
||||||
// directory. The ownership is set based on the security context of the
|
|
||||||
// launcher's first container.
|
|
||||||
launcherSecurityCtx := job.Spec.MPIReplicaSpecs[kubeflow.MPIReplicaTypeLauncher].Template.Spec.Containers[0].SecurityContext
|
|
||||||
initScript := "" +
|
|
||||||
"cp -RL /mnt/ssh/* /mnt/home-ssh && " +
|
|
||||||
"chmod 700 /mnt/home-ssh && " +
|
|
||||||
"chmod 600 /mnt/home-ssh/*"
|
|
||||||
if launcherSecurityCtx != nil && launcherSecurityCtx.RunAsUser != nil {
|
|
||||||
initScript += fmt.Sprintf(" && chown %d -R /mnt/home-ssh", *launcherSecurityCtx.RunAsUser)
|
|
||||||
}
|
|
||||||
podSpec.InitContainers = append(podSpec.InitContainers, corev1.Container{
|
|
||||||
Name: "init-ssh",
|
|
||||||
Image: c.scriptingImage,
|
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
|
||||||
{
|
|
||||||
Name: sshAuthVolume,
|
|
||||||
MountPath: sshAuthMountPath,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Name: sshHomeVolume,
|
|
||||||
MountPath: sshHomeInitMountPath,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Command: []string{"/bin/sh"},
|
|
||||||
Args: []string{"-c", initScript},
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func ownerReferenceAndGVK(object metav1.Object) (*metav1.OwnerReference, schema.GroupVersionKind, error) {
|
func ownerReferenceAndGVK(object metav1.Object) (*metav1.OwnerReference, schema.GroupVersionKind, error) {
|
||||||
|
|
|
||||||
|
|
@ -54,10 +54,6 @@ var (
|
||||||
ignoreSecretEntries = cmpopts.IgnoreMapEntries(func(k string, v []uint8) bool { return true })
|
ignoreSecretEntries = cmpopts.IgnoreMapEntries(func(k string, v []uint8) bool { return true })
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
scriptingImage = "alpine"
|
|
||||||
)
|
|
||||||
|
|
||||||
type fixture struct {
|
type fixture struct {
|
||||||
t *testing.T
|
t *testing.T
|
||||||
|
|
||||||
|
|
@ -171,7 +167,6 @@ func (f *fixture) newController(gangSchedulerName string) (*MPIJobController, in
|
||||||
podgroupsInformer,
|
podgroupsInformer,
|
||||||
i.Kubeflow().V2beta1().MPIJobs(),
|
i.Kubeflow().V2beta1().MPIJobs(),
|
||||||
gangSchedulerName,
|
gangSchedulerName,
|
||||||
scriptingImage,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
c.configMapSynced = alwaysReady
|
c.configMapSynced = alwaysReady
|
||||||
|
|
@ -1015,42 +1010,22 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
corev1.EnvVar{Name: openMPISlotsEnv, Value: "1"},
|
corev1.EnvVar{Name: openMPISlotsEnv, Value: "1"},
|
||||||
nvidiaDisableEnvVars),
|
nvidiaDisableEnvVars),
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
VolumeMounts: []corev1.VolumeMount{
|
||||||
{Name: "ssh-home", MountPath: "/root/.ssh"},
|
{Name: "ssh-auth", MountPath: "/root/.ssh"},
|
||||||
{Name: "mpi-job-config", MountPath: "/etc/mpi"},
|
{Name: "mpi-job-config", MountPath: "/etc/mpi"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
InitContainers: []corev1.Container{
|
|
||||||
{
|
|
||||||
Name: "init-ssh",
|
|
||||||
Image: scriptingImage,
|
|
||||||
Command: []string{"/bin/sh"},
|
|
||||||
Args: []string{
|
|
||||||
"-c",
|
|
||||||
"cp -RL /mnt/ssh/* /mnt/home-ssh && chmod 700 /mnt/home-ssh && chmod 600 /mnt/home-ssh/*",
|
|
||||||
},
|
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
|
||||||
{Name: "ssh-auth", MountPath: "/mnt/ssh"},
|
|
||||||
{Name: "ssh-home", MountPath: "/mnt/home-ssh"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Volumes: []corev1.Volume{
|
Volumes: []corev1.Volume{
|
||||||
{
|
{
|
||||||
Name: "ssh-auth",
|
Name: "ssh-auth",
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
Secret: &corev1.SecretVolumeSource{
|
Secret: &corev1.SecretVolumeSource{
|
||||||
SecretName: "foo-ssh",
|
DefaultMode: newInt32(0600),
|
||||||
Items: sshVolumeItems,
|
SecretName: "foo-ssh",
|
||||||
|
Items: sshVolumeItems,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Name: "ssh-home",
|
|
||||||
VolumeSource: corev1.VolumeSource{
|
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
Name: "mpi-job-config",
|
Name: "mpi-job-config",
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
|
|
@ -1086,42 +1061,22 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
{
|
{
|
||||||
Command: []string{"/usr/sbin/sshd", "-De"},
|
Command: []string{"/usr/sbin/sshd", "-De"},
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
VolumeMounts: []corev1.VolumeMount{
|
||||||
{Name: "ssh-home", MountPath: "/root/.ssh"},
|
{Name: "ssh-auth", MountPath: "/root/.ssh"},
|
||||||
},
|
},
|
||||||
Env: workerEnvVars,
|
Env: workerEnvVars,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
InitContainers: []corev1.Container{
|
|
||||||
{
|
|
||||||
Name: "init-ssh",
|
|
||||||
Image: scriptingImage,
|
|
||||||
Command: []string{"/bin/sh"},
|
|
||||||
Args: []string{
|
|
||||||
"-c",
|
|
||||||
"cp -RL /mnt/ssh/* /mnt/home-ssh && chmod 700 /mnt/home-ssh && chmod 600 /mnt/home-ssh/*",
|
|
||||||
},
|
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
|
||||||
{Name: "ssh-auth", MountPath: "/mnt/ssh"},
|
|
||||||
{Name: "ssh-home", MountPath: "/mnt/home-ssh"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Volumes: []corev1.Volume{
|
Volumes: []corev1.Volume{
|
||||||
{
|
{
|
||||||
Name: "ssh-auth",
|
Name: "ssh-auth",
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
Secret: &corev1.SecretVolumeSource{
|
Secret: &corev1.SecretVolumeSource{
|
||||||
SecretName: "foo-ssh",
|
DefaultMode: newInt32(0600),
|
||||||
Items: sshVolumeItems,
|
SecretName: "foo-ssh",
|
||||||
|
Items: sshVolumeItems,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Name: "ssh-home",
|
|
||||||
VolumeSource: corev1.VolumeSource{
|
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
@ -1225,27 +1180,12 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
nvidiaDisableEnvVars),
|
nvidiaDisableEnvVars),
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
VolumeMounts: []corev1.VolumeMount{
|
||||||
{Name: "fool-vol", MountPath: "/mnt/foo"},
|
{Name: "fool-vol", MountPath: "/mnt/foo"},
|
||||||
{Name: "ssh-home", MountPath: "/home/mpiuser/.ssh"},
|
{Name: "ssh-auth", MountPath: "/home/mpiuser/.ssh"},
|
||||||
{Name: "mpi-job-config", MountPath: "/etc/mpi"},
|
{Name: "mpi-job-config", MountPath: "/etc/mpi"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{},
|
{},
|
||||||
},
|
},
|
||||||
InitContainers: []corev1.Container{
|
|
||||||
{
|
|
||||||
Name: "init-ssh",
|
|
||||||
Image: scriptingImage,
|
|
||||||
Command: []string{"/bin/sh"},
|
|
||||||
Args: []string{
|
|
||||||
"-c",
|
|
||||||
"cp -RL /mnt/ssh/* /mnt/home-ssh && chmod 700 /mnt/home-ssh && chmod 600 /mnt/home-ssh/* && chown 1000 -R /mnt/home-ssh",
|
|
||||||
},
|
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
|
||||||
{Name: "ssh-auth", MountPath: "/mnt/ssh"},
|
|
||||||
{Name: "ssh-home", MountPath: "/mnt/home-ssh"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Volumes: []corev1.Volume{
|
Volumes: []corev1.Volume{
|
||||||
{Name: "foo-vol"},
|
{Name: "foo-vol"},
|
||||||
{
|
{
|
||||||
|
|
@ -1257,12 +1197,6 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Name: "ssh-home",
|
|
||||||
VolumeSource: corev1.VolumeSource{
|
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
Name: "mpi-job-config",
|
Name: "mpi-job-config",
|
||||||
VolumeSource: corev1.VolumeSource{
|
VolumeSource: corev1.VolumeSource{
|
||||||
|
|
@ -1298,26 +1232,11 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
{
|
{
|
||||||
Command: []string{"/entrypoint.sh"},
|
Command: []string{"/entrypoint.sh"},
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
VolumeMounts: []corev1.VolumeMount{
|
||||||
{Name: "ssh-home", MountPath: "/home/mpiuser/.ssh"},
|
{Name: "ssh-auth", MountPath: "/home/mpiuser/.ssh"},
|
||||||
},
|
},
|
||||||
Env: joinEnvVars(corev1.EnvVar{Name: "FOO", Value: "bar"}, workerEnvVars),
|
Env: joinEnvVars(corev1.EnvVar{Name: "FOO", Value: "bar"}, workerEnvVars),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
InitContainers: []corev1.Container{
|
|
||||||
{
|
|
||||||
Name: "init-ssh",
|
|
||||||
Image: scriptingImage,
|
|
||||||
Command: []string{"/bin/sh"},
|
|
||||||
Args: []string{
|
|
||||||
"-c",
|
|
||||||
"cp -RL /mnt/ssh/* /mnt/home-ssh && chmod 700 /mnt/home-ssh && chmod 600 /mnt/home-ssh/* && chown 1000 -R /mnt/home-ssh",
|
|
||||||
},
|
|
||||||
VolumeMounts: []corev1.VolumeMount{
|
|
||||||
{Name: "ssh-auth", MountPath: "/mnt/ssh"},
|
|
||||||
{Name: "ssh-home", MountPath: "/mnt/home-ssh"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
Volumes: []corev1.Volume{
|
Volumes: []corev1.Volume{
|
||||||
{
|
{
|
||||||
Name: "ssh-auth",
|
Name: "ssh-auth",
|
||||||
|
|
@ -1328,12 +1247,6 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
|
||||||
Name: "ssh-home",
|
|
||||||
VolumeSource: corev1.VolumeSource{
|
|
||||||
EmptyDir: &corev1.EmptyDirVolumeSource{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
@ -1344,9 +1257,7 @@ func TestNewLauncherAndWorker(t *testing.T) {
|
||||||
t.Run(name, func(t *testing.T) {
|
t.Run(name, func(t *testing.T) {
|
||||||
job := tc.job.DeepCopy()
|
job := tc.job.DeepCopy()
|
||||||
scheme.Scheme.Default(job)
|
scheme.Scheme.Default(job)
|
||||||
ctrl := &MPIJobController{
|
ctrl := &MPIJobController{}
|
||||||
scriptingImage: scriptingImage,
|
|
||||||
}
|
|
||||||
launcher := ctrl.newLauncherJob(job)
|
launcher := ctrl.newLauncherJob(job)
|
||||||
if !metav1.IsControlledBy(launcher, job) {
|
if !metav1.IsControlledBy(launcher, job) {
|
||||||
t.Errorf("Created launcher Pod is not controlled by Job")
|
t.Errorf("Created launcher Pod is not controlled by Job")
|
||||||
|
|
@ -1407,8 +1318,7 @@ func (f *fixture) newFakeMPIJobController() *MPIJobController {
|
||||||
|
|
||||||
k8sI := kubeinformers.NewSharedInformerFactory(kubeClient, noResyncPeriodFunc())
|
k8sI := kubeinformers.NewSharedInformerFactory(kubeClient, noResyncPeriodFunc())
|
||||||
return &MPIJobController{
|
return &MPIJobController{
|
||||||
recorder: &record.FakeRecorder{},
|
recorder: &record.FakeRecorder{},
|
||||||
podLister: k8sI.Core().V1().Pods().Lister(),
|
podLister: k8sI.Core().V1().Pods().Lister(),
|
||||||
scriptingImage: scriptingImage,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ const (
|
||||||
envTestMPIOperatorImage = "TEST_MPI_OPERATOR_IMAGE"
|
envTestMPIOperatorImage = "TEST_MPI_OPERATOR_IMAGE"
|
||||||
envTestKindImage = "TEST_KIND_IMAGE"
|
envTestKindImage = "TEST_KIND_IMAGE"
|
||||||
|
|
||||||
defaultMPIOperatorImage = "kubeflow/mpi-operator:local"
|
defaultMPIOperatorImage = "mpioperator/mpi-operator:local"
|
||||||
defaultKindImage = "kindest/node:v1.21.2"
|
defaultKindImage = "kindest/node:v1.21.2"
|
||||||
openMPIImage = "mpioperator/mpi-pi:openmpi"
|
openMPIImage = "mpioperator/mpi-pi:openmpi"
|
||||||
intelMPIImage = "mpioperator/mpi-pi:intel"
|
intelMPIImage = "mpioperator/mpi-pi:intel"
|
||||||
|
|
|
||||||
|
|
@ -40,8 +40,7 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
waitInterval = 100 * time.Millisecond
|
waitInterval = 100 * time.Millisecond
|
||||||
scriptingImage = "alpine"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestMPIJobSuccess(t *testing.T) {
|
func TestMPIJobSuccess(t *testing.T) {
|
||||||
|
|
@ -308,8 +307,7 @@ func startController(ctx context.Context, kClient kubernetes.Interface, mpiClien
|
||||||
kubeInformerFactory.Core().V1().Pods(),
|
kubeInformerFactory.Core().V1().Pods(),
|
||||||
nil,
|
nil,
|
||||||
mpiInformerFactory.Kubeflow().V2beta1().MPIJobs(),
|
mpiInformerFactory.Kubeflow().V2beta1().MPIJobs(),
|
||||||
"",
|
"")
|
||||||
scriptingImage)
|
|
||||||
|
|
||||||
go kubeInformerFactory.Start(ctx.Done())
|
go kubeInformerFactory.Start(ctx.Done())
|
||||||
go mpiInformerFactory.Start(ctx.Done())
|
go mpiInformerFactory.Start(ctx.Done())
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue