Initial code push

This commit is contained in:
Rong Ou 2018-05-30 17:02:15 -07:00 committed by Rong Ou
parent ddac780ed0
commit 9994bd02db
47 changed files with 4441 additions and 2 deletions

1
.dockerignore Symbolic link
View File

@ -0,0 +1 @@
.gitignore

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.idea/
vendor/

471
Gopkg.lock generated Normal file
View File

@ -0,0 +1,471 @@
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
[[projects]]
name = "github.com/davecgh/go-spew"
packages = ["spew"]
revision = "346938d642f2ec3594ed81d874461961cd0faa76"
version = "v1.1.0"
[[projects]]
name = "github.com/ghodss/yaml"
packages = ["."]
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
version = "v1.0.0"
[[projects]]
name = "github.com/gogo/protobuf"
packages = [
"proto",
"sortkeys"
]
revision = "1adfc126b41513cc696b209667c8656ea7aac67c"
version = "v1.0.0"
[[projects]]
branch = "master"
name = "github.com/golang/glog"
packages = ["."]
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
[[projects]]
branch = "master"
name = "github.com/golang/groupcache"
packages = ["lru"]
revision = "24b0969c4cb722950103eed87108c8d291a8df00"
[[projects]]
name = "github.com/golang/protobuf"
packages = [
"proto",
"ptypes",
"ptypes/any",
"ptypes/duration",
"ptypes/timestamp"
]
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
version = "v1.1.0"
[[projects]]
branch = "master"
name = "github.com/google/gofuzz"
packages = ["."]
revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
[[projects]]
name = "github.com/googleapis/gnostic"
packages = [
"OpenAPIv2",
"compiler",
"extensions"
]
revision = "7c663266750e7d82587642f65e60bc4083f1f84e"
version = "v0.2.0"
[[projects]]
branch = "master"
name = "github.com/hashicorp/golang-lru"
packages = [
".",
"simplelru"
]
revision = "0fb14efe8c47ae851c0034ed7a448854d3d34cf3"
[[projects]]
branch = "master"
name = "github.com/howeyc/gopass"
packages = ["."]
revision = "bf9dde6d0d2c004a008c27aaee91170c786f6db8"
[[projects]]
name = "github.com/imdario/mergo"
packages = ["."]
revision = "9d5f1277e9a8ed20c3684bda8fde67c05628518c"
version = "v0.3.4"
[[projects]]
name = "github.com/json-iterator/go"
packages = ["."]
revision = "ca39e5af3ece67bbcda3d0f4f56a8e24d9f2dad4"
version = "1.1.3"
[[projects]]
name = "github.com/modern-go/concurrent"
packages = ["."]
revision = "bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94"
version = "1.0.3"
[[projects]]
name = "github.com/modern-go/reflect2"
packages = ["."]
revision = "1df9eeb2bb81f327b96228865c5687bc2194af3f"
version = "1.0.0"
[[projects]]
name = "github.com/spf13/pflag"
packages = ["."]
revision = "583c0c0531f06d5278b7d917446061adc344b5cd"
version = "v1.0.1"
[[projects]]
branch = "master"
name = "golang.org/x/crypto"
packages = ["ssh/terminal"]
revision = "5ba7f63082460102a45837dbd1827e10f9479ac0"
[[projects]]
branch = "master"
name = "golang.org/x/net"
packages = [
"context",
"http/httpguts",
"http2",
"http2/hpack",
"idna"
]
revision = "1e491301e022f8f977054da4c2d852decd59571f"
[[projects]]
branch = "master"
name = "golang.org/x/sys"
packages = [
"unix",
"windows"
]
revision = "c11f84a56e43e20a78cee75a7c034031ecf57d1f"
[[projects]]
name = "golang.org/x/text"
packages = [
"collate",
"collate/build",
"internal/colltab",
"internal/gen",
"internal/tag",
"internal/triegen",
"internal/ucd",
"language",
"secure/bidirule",
"transform",
"unicode/bidi",
"unicode/cldr",
"unicode/norm",
"unicode/rangetable"
]
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
version = "v0.3.0"
[[projects]]
branch = "master"
name = "golang.org/x/time"
packages = ["rate"]
revision = "fbb02b2291d28baffd63558aa44b4b56f178d650"
[[projects]]
branch = "master"
name = "golang.org/x/tools"
packages = [
"go/ast/astutil",
"imports",
"internal/fastwalk"
]
revision = "a5b4c53f6e8bdcafa95a94671bf2d1203365858b"
[[projects]]
name = "gopkg.in/inf.v0"
packages = ["."]
revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf"
version = "v0.9.1"
[[projects]]
name = "gopkg.in/yaml.v2"
packages = ["."]
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
version = "v2.2.1"
[[projects]]
name = "k8s.io/api"
packages = [
"admissionregistration/v1alpha1",
"admissionregistration/v1beta1",
"apps/v1",
"apps/v1beta1",
"apps/v1beta2",
"authentication/v1",
"authentication/v1beta1",
"authorization/v1",
"authorization/v1beta1",
"autoscaling/v1",
"autoscaling/v2beta1",
"batch/v1",
"batch/v1beta1",
"batch/v2alpha1",
"certificates/v1beta1",
"core/v1",
"events/v1beta1",
"extensions/v1beta1",
"networking/v1",
"policy/v1beta1",
"rbac/v1",
"rbac/v1alpha1",
"rbac/v1beta1",
"scheduling/v1alpha1",
"settings/v1alpha1",
"storage/v1",
"storage/v1alpha1",
"storage/v1beta1"
]
revision = "feb48db456a5912850dcccbd42a3535382ba76de"
version = "kubernetes-1.10.3"
[[projects]]
branch = "release-1.10"
name = "k8s.io/apimachinery"
packages = [
"pkg/api/errors",
"pkg/api/meta",
"pkg/api/resource",
"pkg/apis/meta/internalversion",
"pkg/apis/meta/v1",
"pkg/apis/meta/v1/unstructured",
"pkg/apis/meta/v1beta1",
"pkg/conversion",
"pkg/conversion/queryparams",
"pkg/fields",
"pkg/labels",
"pkg/runtime",
"pkg/runtime/schema",
"pkg/runtime/serializer",
"pkg/runtime/serializer/json",
"pkg/runtime/serializer/protobuf",
"pkg/runtime/serializer/recognizer",
"pkg/runtime/serializer/streaming",
"pkg/runtime/serializer/versioning",
"pkg/selection",
"pkg/types",
"pkg/util/cache",
"pkg/util/clock",
"pkg/util/diff",
"pkg/util/errors",
"pkg/util/framer",
"pkg/util/intstr",
"pkg/util/json",
"pkg/util/mergepatch",
"pkg/util/net",
"pkg/util/runtime",
"pkg/util/sets",
"pkg/util/strategicpatch",
"pkg/util/validation",
"pkg/util/validation/field",
"pkg/util/wait",
"pkg/util/yaml",
"pkg/version",
"pkg/watch",
"third_party/forked/golang/json",
"third_party/forked/golang/reflect"
]
revision = "31dade610c053669d8054bfd847da657251e8c1a"
[[projects]]
name = "k8s.io/client-go"
packages = [
"discovery",
"discovery/fake",
"informers",
"informers/admissionregistration",
"informers/admissionregistration/v1alpha1",
"informers/admissionregistration/v1beta1",
"informers/apps",
"informers/apps/v1",
"informers/apps/v1beta1",
"informers/apps/v1beta2",
"informers/autoscaling",
"informers/autoscaling/v1",
"informers/autoscaling/v2beta1",
"informers/batch",
"informers/batch/v1",
"informers/batch/v1beta1",
"informers/batch/v2alpha1",
"informers/certificates",
"informers/certificates/v1beta1",
"informers/core",
"informers/core/v1",
"informers/events",
"informers/events/v1beta1",
"informers/extensions",
"informers/extensions/v1beta1",
"informers/internalinterfaces",
"informers/networking",
"informers/networking/v1",
"informers/policy",
"informers/policy/v1beta1",
"informers/rbac",
"informers/rbac/v1",
"informers/rbac/v1alpha1",
"informers/rbac/v1beta1",
"informers/scheduling",
"informers/scheduling/v1alpha1",
"informers/settings",
"informers/settings/v1alpha1",
"informers/storage",
"informers/storage/v1",
"informers/storage/v1alpha1",
"informers/storage/v1beta1",
"kubernetes",
"kubernetes/fake",
"kubernetes/scheme",
"kubernetes/typed/admissionregistration/v1alpha1",
"kubernetes/typed/admissionregistration/v1alpha1/fake",
"kubernetes/typed/admissionregistration/v1beta1",
"kubernetes/typed/admissionregistration/v1beta1/fake",
"kubernetes/typed/apps/v1",
"kubernetes/typed/apps/v1/fake",
"kubernetes/typed/apps/v1beta1",
"kubernetes/typed/apps/v1beta1/fake",
"kubernetes/typed/apps/v1beta2",
"kubernetes/typed/apps/v1beta2/fake",
"kubernetes/typed/authentication/v1",
"kubernetes/typed/authentication/v1/fake",
"kubernetes/typed/authentication/v1beta1",
"kubernetes/typed/authentication/v1beta1/fake",
"kubernetes/typed/authorization/v1",
"kubernetes/typed/authorization/v1/fake",
"kubernetes/typed/authorization/v1beta1",
"kubernetes/typed/authorization/v1beta1/fake",
"kubernetes/typed/autoscaling/v1",
"kubernetes/typed/autoscaling/v1/fake",
"kubernetes/typed/autoscaling/v2beta1",
"kubernetes/typed/autoscaling/v2beta1/fake",
"kubernetes/typed/batch/v1",
"kubernetes/typed/batch/v1/fake",
"kubernetes/typed/batch/v1beta1",
"kubernetes/typed/batch/v1beta1/fake",
"kubernetes/typed/batch/v2alpha1",
"kubernetes/typed/batch/v2alpha1/fake",
"kubernetes/typed/certificates/v1beta1",
"kubernetes/typed/certificates/v1beta1/fake",
"kubernetes/typed/core/v1",
"kubernetes/typed/core/v1/fake",
"kubernetes/typed/events/v1beta1",
"kubernetes/typed/events/v1beta1/fake",
"kubernetes/typed/extensions/v1beta1",
"kubernetes/typed/extensions/v1beta1/fake",
"kubernetes/typed/networking/v1",
"kubernetes/typed/networking/v1/fake",
"kubernetes/typed/policy/v1beta1",
"kubernetes/typed/policy/v1beta1/fake",
"kubernetes/typed/rbac/v1",
"kubernetes/typed/rbac/v1/fake",
"kubernetes/typed/rbac/v1alpha1",
"kubernetes/typed/rbac/v1alpha1/fake",
"kubernetes/typed/rbac/v1beta1",
"kubernetes/typed/rbac/v1beta1/fake",
"kubernetes/typed/scheduling/v1alpha1",
"kubernetes/typed/scheduling/v1alpha1/fake",
"kubernetes/typed/settings/v1alpha1",
"kubernetes/typed/settings/v1alpha1/fake",
"kubernetes/typed/storage/v1",
"kubernetes/typed/storage/v1/fake",
"kubernetes/typed/storage/v1alpha1",
"kubernetes/typed/storage/v1alpha1/fake",
"kubernetes/typed/storage/v1beta1",
"kubernetes/typed/storage/v1beta1/fake",
"listers/admissionregistration/v1alpha1",
"listers/admissionregistration/v1beta1",
"listers/apps/v1",
"listers/apps/v1beta1",
"listers/apps/v1beta2",
"listers/autoscaling/v1",
"listers/autoscaling/v2beta1",
"listers/batch/v1",
"listers/batch/v1beta1",
"listers/batch/v2alpha1",
"listers/certificates/v1beta1",
"listers/core/v1",
"listers/events/v1beta1",
"listers/extensions/v1beta1",
"listers/networking/v1",
"listers/policy/v1beta1",
"listers/rbac/v1",
"listers/rbac/v1alpha1",
"listers/rbac/v1beta1",
"listers/scheduling/v1alpha1",
"listers/settings/v1alpha1",
"listers/storage/v1",
"listers/storage/v1alpha1",
"listers/storage/v1beta1",
"pkg/apis/clientauthentication",
"pkg/apis/clientauthentication/v1alpha1",
"pkg/version",
"plugin/pkg/client/auth/exec",
"rest",
"rest/watch",
"testing",
"tools/auth",
"tools/cache",
"tools/clientcmd",
"tools/clientcmd/api",
"tools/clientcmd/api/latest",
"tools/clientcmd/api/v1",
"tools/metrics",
"tools/pager",
"tools/record",
"tools/reference",
"transport",
"util/buffer",
"util/cert",
"util/flowcontrol",
"util/homedir",
"util/integer",
"util/retry",
"util/workqueue"
]
revision = "29ae1f00c3d8bb759d6246c357573a9af3c659c1"
version = "kubernetes-1.10.3"
[[projects]]
branch = "master"
name = "k8s.io/code-generator"
packages = [
"cmd/client-gen",
"cmd/client-gen/args",
"cmd/client-gen/generators",
"cmd/client-gen/generators/fake",
"cmd/client-gen/generators/scheme",
"cmd/client-gen/generators/util",
"cmd/client-gen/path",
"cmd/client-gen/types",
"pkg/util"
]
revision = "2381612e86473457f7e1b8f7edf16cf1e191d859"
[[projects]]
branch = "master"
name = "k8s.io/gengo"
packages = [
"args",
"generator",
"namer",
"parser",
"types"
]
revision = "2e1a79edcaecf0bfbde129a1fd55624b66adb699"
[[projects]]
branch = "master"
name = "k8s.io/kube-openapi"
packages = ["pkg/util/proto"]
revision = "8a9b82f00b3a86eac24681da3f9fe6c34c01cea2"
[[projects]]
branch = "master"
name = "k8s.io/sample-controller"
packages = ["pkg/signals"]
revision = "9946af3e3014758bed13e404bcd95ed27a4e37c2"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "80b42d5ea1ce943bcf8a5b5cdfd29034113dfcb280253423c82a5a5afbcef951"
solver-name = "gps-cdcl"
solver-version = 1

13
Gopkg.toml Normal file
View File

@ -0,0 +1,13 @@
required = ["k8s.io/code-generator/cmd/client-gen"]
[[override]]
name = "k8s.io/api"
version = "kubernetes-1.10.3"
[[override]]
name = "k8s.io/apimachinery"
version = "kubernetes-1.10.3"
[[override]]
name = "k8s.io/client-go"
version = "kubernetes-1.10.3"

202
LICENSE Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -1,2 +1,52 @@
# mpi-operator # MPI Operator
Repository for the MPI operator.
The MPI Operator makes it easy to run allreduce-style distributed training.
## Build
Check out the code:
```shell
mkdir -p ${GOPATH}/src/github.com/kubeflow
cd ${GOPATH}/src/github.com/kubeflow
git clone https://github.com/kubeflow/mpi-operator.git
cd mpi-operator
```
Build and push the `mpi-operator` Docker image:
```shell
docker built -t rongou/mpi-operator:0.1.0 -f cmd/mpi-operator/Dockerfile .
docker push rongou/mpi-operator:0.1.0
```
Build and push the `kubectl-delivery` Docker image:
```shell
docker build -t rongou/kubectl-delivery:0.1.0 -f cmd/kubectl-delivery/Dockerfile .
docker push rongou/mpi-operator:0.1.0
```
## Deploy
```shell
kubectl create -f deploy/
```
## Test
Build and push the `horovod` Docker image (this takes a while):
```shell
docker build -t rongou/horovod https://github.com/uber/horovod.git
docker push rongou/horovod
```
Build and push the `tensorflow_benchmarks` Docker image:
```shell
docker build -t rongou/tensorflow_benchmarks examples/tensorflow-benchmarks
docker push rongou/tensorflow_benchmarks
```
Launch a multi-node tensorflow benchmark training job:
```shell
kubectl create -f examples/tensorflow-benchmarks.yaml
```
Once everything starts, the logs are available in the `launcher` pod.

View File

@ -0,0 +1,17 @@
FROM alpine:3.7 AS build
# Install kubectl.
ENV K8S_VERSION v1.10.3
RUN apk add --no-cache wget
RUN wget -q https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl
RUN chmod +x ./kubectl
RUN mv ./kubectl /bin/kubectl
# Copy all project.
# This layer is rebuilt when ever a file has changed in the project directory.
COPY . /go/src/github.com/kubeflow/mpi-operator/
FROM alpine:3.7
COPY --from=build /bin/kubectl /bin/kubectl
COPY --from=build /go/src/github.com/kubeflow/mpi-operator/cmd/kubectl-delivery/deliver_kubectl.sh .
ENTRYPOINT ["./deliver_kubectl.sh"]

View File

@ -0,0 +1,24 @@
#!/bin/sh
# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if ! which kubectl > /dev/null; then
echo "kubectl needs to be installed"
exit 1
fi
: ${TARGET_DIR:?"Need to set TARGET_DIR, e.g. /opt/kube"}
cp $(which kubectl) ${TARGET_DIR}

View File

@ -0,0 +1,24 @@
FROM golang:1.10.2-alpine3.7 AS build
# Install tools required to build the project.
# We need to run `docker build --no-cache .` to update those dependencies.
RUN apk add --no-cache git
RUN go get github.com/golang/dep/cmd/dep
# Gopkg.toml and Gopkg.lock lists project dependencies.
# These layers are only re-built when Gopkg files are updated.
COPY Gopkg.lock Gopkg.toml /go/src/github.com/kubeflow/mpi-operator/
WORKDIR /go/src/github.com/kubeflow/mpi-operator/
# Install library dependencies.
RUN dep ensure -vendor-only
# Copy all project and build it.
# This layer is rebuilt when ever a file has changed in the project directory.
COPY . /go/src/github.com/kubeflow/mpi-operator/
RUN go build -o /bin/mpi-operator github.com/kubeflow/mpi-operator/cmd/mpi-operator
FROM alpine:3.7
COPY --from=build /bin/mpi-operator /bin/mpi-operator
ENTRYPOINT ["/bin/mpi-operator"]
CMD ["--help"]

89
cmd/mpi-operator/main.go Normal file
View File

@ -0,0 +1,89 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"flag"
"time"
"github.com/golang/glog"
kubeinformers "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/sample-controller/pkg/signals"
clientset "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
informers "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions"
"github.com/kubeflow/mpi-operator/pkg/controllers"
)
var (
masterURL string
kubeConfig string
gpusPerNode int
kubectlDeliveryImage string
)
func main() {
flag.Parse()
// set up signals so we handle the first shutdown signal gracefully
stopCh := signals.SetupSignalHandler()
cfg, err := clientcmd.BuildConfigFromFlags(masterURL, kubeConfig)
if err != nil {
glog.Fatalf("Error building kubeConfig: %s", err.Error())
}
kubeClient, err := kubernetes.NewForConfig(cfg)
if err != nil {
glog.Fatalf("Error building kubernetes clientset: %s", err.Error())
}
kubeflowClient, err := clientset.NewForConfig(cfg)
if err != nil {
glog.Fatalf("Error building kubeflow clientset: %s", err.Error())
}
kubeInformerFactory := kubeinformers.NewSharedInformerFactory(kubeClient, time.Second*30)
kubeflowInformerFactory := informers.NewSharedInformerFactory(kubeflowClient, time.Second*30)
controller := controllers.NewMPIJobController(
kubeClient,
kubeflowClient,
kubeInformerFactory.Core().V1().ConfigMaps(),
kubeInformerFactory.Core().V1().ServiceAccounts(),
kubeInformerFactory.Rbac().V1().Roles(),
kubeInformerFactory.Rbac().V1().RoleBindings(),
kubeInformerFactory.Apps().V1().StatefulSets(),
kubeInformerFactory.Batch().V1().Jobs(),
kubeflowInformerFactory.Kubeflow().V1alpha1().MPIJobs(),
gpusPerNode,
kubectlDeliveryImage)
go kubeInformerFactory.Start(stopCh)
go kubeflowInformerFactory.Start(stopCh)
if err = controller.Run(2, stopCh); err != nil {
glog.Fatalf("Error running controller: %s", err.Error())
}
}
func init() {
flag.StringVar(&kubeConfig, "kubeConfig", "", "Path to a kubeConfig. Only required if out-of-cluster.")
flag.StringVar(&masterURL, "master", "", "The address of the Kubernetes API server. Overrides any value in kubeConfig. Only required if out-of-cluster.")
flag.IntVar(&gpusPerNode, "gpus-per-node", 1, "The maximum number of GPUs available per node.")
flag.StringVar(&kubectlDeliveryImage, "kubectl-delivery-image", "", "The container image used to deliver the kubectl binary.")
}

15
deploy/0-crd.yaml Normal file
View File

@ -0,0 +1,15 @@
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: mpijobs.kubeflow.org
spec:
group: kubeflow.org
version: v1alpha1
scope: Namespaced
names:
plural: mpijobs
singular: mpijob
kind: MPIJob
shortNames:
- mj
- mpij

4
deploy/1-namespace.yaml Normal file
View File

@ -0,0 +1,4 @@
apiVersion: v1
kind: Namespace
metadata:
name: mpi-operator

93
deploy/2-rbac.yaml Normal file
View File

@ -0,0 +1,93 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: mpi-operator
rules:
- apiGroups:
- ""
resources:
- configmaps
- serviceaccounts
verbs:
- create
- list
- watch
# This is needed for the launcher Role.
- apiGroups:
- ""
resources:
- pods
verbs:
- get
# This is needed for the launcher Role.
- apiGroups:
- ""
resources:
- pods/exec
verbs:
- create
- apiGroups:
- ""
resources:
- events
verbs:
- create
- patch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
verbs:
- create
- list
- watch
- apiGroups:
- apps
resources:
- statefulsets
verbs:
- create
- list
- watch
- apiGroups:
- batch
resources:
- jobs
verbs:
- create
- list
- watch
- apiGroups:
- apiextensions.k8s.io
resources:
- customresourcedefinitions
verbs:
- create
- get
- apiGroups:
- kubeflow.org
resources:
- mpijobs
verbs:
- "*"
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: mpi-operator
namespace: mpi-operator
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: mpi-operator
namespace: mpi-operator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: mpi-operator
subjects:
- kind: ServiceAccount
name: mpi-operator
namespace: mpi-operator

View File

@ -0,0 +1,27 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: mpi-operator
namespace: mpi-operator
labels:
app: mpi-operator
spec:
replicas: 1
selector:
matchLabels:
app: mpi-operator
template:
metadata:
labels:
app: mpi-operator
spec:
serviceAccountName: mpi-operator
containers:
- name: mpi-operator
image: rongou/mpi-operator:0.1.0
args: [
"--gpus-per-node", "8",
"--kubectl-delivery-image",
"rongou/kubectl-delivery:0.1.0"
]
imagePullPolicy: Always

View File

@ -0,0 +1,12 @@
# This file shows how to run multi-node training benchmarks using an MPIJob.
apiVersion: kubeflow.org/v1alpha1
kind: MPIJob
metadata:
name: tensorflow-benchmarks-16
spec:
gpus: 16
template:
spec:
containers:
- image: rongou/tensorflow_benchmarks:latest
name: tensorflow-benchmarks

View File

@ -0,0 +1,12 @@
FROM rongou/horovod
RUN mkdir /tensorflow
WORKDIR "/tensorflow"
RUN git clone https://github.com/tensorflow/benchmarks
WORKDIR "/tensorflow/benchmarks"
CMD mpirun \
python scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py \
--model resnet101 \
--batch_size 64 \
--variable_update horovod

View File

@ -0,0 +1,13 @@
// Copyright YEAR The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

26
hack/update-codegen.sh Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
SCRIPT_ROOT=$(dirname ${BASH_SOURCE})/..
vendor/k8s.io/code-generator/generate-groups.sh "deepcopy,client,informer,lister" \
github.com/kubeflow/mpi-operator/pkg/client github.com/kubeflow/mpi-operator/pkg/apis \
kubeflow:v1alpha1 \
--go-header-file ${SCRIPT_ROOT}/hack/custom-boilerplate.go.txt

48
hack/verify-codegen.sh Executable file
View File

@ -0,0 +1,48 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubeflow Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
SCRIPT_ROOT=$(dirname "${BASH_SOURCE}")/..
DIFFROOT="${SCRIPT_ROOT}/pkg"
TMP_DIFFROOT="${SCRIPT_ROOT}/_tmp/pkg"
_tmp="${SCRIPT_ROOT}/_tmp"
cleanup() {
rm -rf "${_tmp}"
}
trap "cleanup" EXIT SIGINT
cleanup
mkdir -p "${TMP_DIFFROOT}"
cp -a "${DIFFROOT}"/* "${TMP_DIFFROOT}"
"${SCRIPT_ROOT}/hack/update-codegen.sh"
echo "diffing ${DIFFROOT} against freshly generated codegen"
ret=0
diff -Naupr "${DIFFROOT}" "${TMP_DIFFROOT}" || ret=$?
cp -a "${TMP_DIFFROOT}"/* "${DIFFROOT}"
if [[ $ret -eq 0 ]]
then
echo "${DIFFROOT} up to date."
else
echo "${DIFFROOT} is out of date. Please run hack/update-codegen.sh"
exit 1
fi

View File

@ -0,0 +1,17 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// +k8s:deepcopy-gen=package
// +groupName=kubeflow.org
package v1alpha1

View File

@ -0,0 +1,49 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v1alpha1
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
)
const (
version = "v1alpha1"
groupName = "kubeflow.org"
kind = "MPIJob"
)
var (
SchemeBuilder = runtime.NewSchemeBuilder(addKnownTypes)
AddToScheme = SchemeBuilder.AddToScheme
SchemeGroupVersion = schema.GroupVersion{Group: groupName, Version: version}
SchemeGroupVersionKind = schema.GroupVersionKind{Group: groupName, Version: version, Kind: kind}
)
// Resource takes an unqualified resource and returns a Group qualified GroupResource.
func Resource(resource string) schema.GroupResource {
return SchemeGroupVersion.WithResource(resource).GroupResource()
}
// addKnownTypes adds the set of types defined in this package to the supplied scheme.
func addKnownTypes(scheme *runtime.Scheme) error {
scheme.AddKnownTypes(SchemeGroupVersion,
&MPIJob{},
&MPIJobList{},
)
metav1.AddToGroupVersion(scheme, SchemeGroupVersion)
return nil
}

View File

@ -0,0 +1,69 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// +genclient
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
type MPIJob struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec MPIJobSpec `json:"spec,omitempty"`
Status MPIJobStatus `json:"status,omitempty"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
type MPIJobList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata"`
Items []MPIJob `json:"items"`
}
type MPIJobSpec struct {
// Specifies the desired number of GPUs the MPIJob should run on.
// +optional
GPUs *int32 `json:"gpus,omitempty"`
// Describes the pod that will be created when executing an MPIJob.
Template corev1.PodTemplateSpec `json:"template,omitempty"`
}
type MPIJobLauncherStatusType string
// These are valid launcher statuses of an MPIJob.
const (
// LauncherActive means the MPIJob launcher is actively running.
LauncherActive MPIJobLauncherStatusType = "Active"
// LauncherSucceeded means the MPIJob launcher has succeeded.
LauncherSucceeded MPIJobLauncherStatusType = "Succeeded"
// LauncherFailed means the MPIJob launcher has failed its execution.
LauncherFailed MPIJobLauncherStatusType = "Failed"
)
type MPIJobStatus struct {
// Current status of the launcher job.
// +optional
LauncherStatus MPIJobLauncherStatusType `json:"launcherStatus,omitempty"`
// The number of available worker replicas.
// +optional
WorkerReplicas int32 `json:"workerReplicas,omitempty"`
}

View File

@ -0,0 +1,126 @@
// +build !ignore_autogenerated
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by deepcopy-gen. DO NOT EDIT.
package v1alpha1
import (
runtime "k8s.io/apimachinery/pkg/runtime"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MPIJob) DeepCopyInto(out *MPIJob) {
*out = *in
out.TypeMeta = in.TypeMeta
in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
in.Spec.DeepCopyInto(&out.Spec)
out.Status = in.Status
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJob.
func (in *MPIJob) DeepCopy() *MPIJob {
if in == nil {
return nil
}
out := new(MPIJob)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *MPIJob) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MPIJobList) DeepCopyInto(out *MPIJobList) {
*out = *in
out.TypeMeta = in.TypeMeta
out.ListMeta = in.ListMeta
if in.Items != nil {
in, out := &in.Items, &out.Items
*out = make([]MPIJob, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobList.
func (in *MPIJobList) DeepCopy() *MPIJobList {
if in == nil {
return nil
}
out := new(MPIJobList)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *MPIJobList) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MPIJobSpec) DeepCopyInto(out *MPIJobSpec) {
*out = *in
if in.GPUs != nil {
in, out := &in.GPUs, &out.GPUs
if *in == nil {
*out = nil
} else {
*out = new(int32)
**out = **in
}
}
in.Template.DeepCopyInto(&out.Template)
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobSpec.
func (in *MPIJobSpec) DeepCopy() *MPIJobSpec {
if in == nil {
return nil
}
out := new(MPIJobSpec)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *MPIJobStatus) DeepCopyInto(out *MPIJobStatus) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MPIJobStatus.
func (in *MPIJobStatus) DeepCopy() *MPIJobStatus {
if in == nil {
return nil
}
out := new(MPIJobStatus)
in.DeepCopyInto(out)
return out
}

View File

@ -0,0 +1,96 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package versioned
import (
kubeflowv1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/typed/kubeflow/v1alpha1"
discovery "k8s.io/client-go/discovery"
rest "k8s.io/client-go/rest"
flowcontrol "k8s.io/client-go/util/flowcontrol"
)
type Interface interface {
Discovery() discovery.DiscoveryInterface
KubeflowV1alpha1() kubeflowv1alpha1.KubeflowV1alpha1Interface
// Deprecated: please explicitly pick a version if possible.
Kubeflow() kubeflowv1alpha1.KubeflowV1alpha1Interface
}
// Clientset contains the clients for groups. Each group has exactly one
// version included in a Clientset.
type Clientset struct {
*discovery.DiscoveryClient
kubeflowV1alpha1 *kubeflowv1alpha1.KubeflowV1alpha1Client
}
// KubeflowV1alpha1 retrieves the KubeflowV1alpha1Client
func (c *Clientset) KubeflowV1alpha1() kubeflowv1alpha1.KubeflowV1alpha1Interface {
return c.kubeflowV1alpha1
}
// Deprecated: Kubeflow retrieves the default version of KubeflowClient.
// Please explicitly pick a version.
func (c *Clientset) Kubeflow() kubeflowv1alpha1.KubeflowV1alpha1Interface {
return c.kubeflowV1alpha1
}
// Discovery retrieves the DiscoveryClient
func (c *Clientset) Discovery() discovery.DiscoveryInterface {
if c == nil {
return nil
}
return c.DiscoveryClient
}
// NewForConfig creates a new Clientset for the given config.
func NewForConfig(c *rest.Config) (*Clientset, error) {
configShallowCopy := *c
if configShallowCopy.RateLimiter == nil && configShallowCopy.QPS > 0 {
configShallowCopy.RateLimiter = flowcontrol.NewTokenBucketRateLimiter(configShallowCopy.QPS, configShallowCopy.Burst)
}
var cs Clientset
var err error
cs.kubeflowV1alpha1, err = kubeflowv1alpha1.NewForConfig(&configShallowCopy)
if err != nil {
return nil, err
}
cs.DiscoveryClient, err = discovery.NewDiscoveryClientForConfig(&configShallowCopy)
if err != nil {
return nil, err
}
return &cs, nil
}
// NewForConfigOrDie creates a new Clientset for the given config and
// panics if there is an error in the config.
func NewForConfigOrDie(c *rest.Config) *Clientset {
var cs Clientset
cs.kubeflowV1alpha1 = kubeflowv1alpha1.NewForConfigOrDie(c)
cs.DiscoveryClient = discovery.NewDiscoveryClientForConfigOrDie(c)
return &cs
}
// New creates a new Clientset for the given RESTClient.
func New(c rest.Interface) *Clientset {
var cs Clientset
cs.kubeflowV1alpha1 = kubeflowv1alpha1.New(c)
cs.DiscoveryClient = discovery.NewDiscoveryClient(c)
return &cs
}

View File

@ -0,0 +1,18 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
// This package has the automatically generated clientset.
package versioned

View File

@ -0,0 +1,80 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package fake
import (
clientset "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
kubeflowv1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/typed/kubeflow/v1alpha1"
fakekubeflowv1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/typed/kubeflow/v1alpha1/fake"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/discovery"
fakediscovery "k8s.io/client-go/discovery/fake"
"k8s.io/client-go/testing"
)
// NewSimpleClientset returns a clientset that will respond with the provided objects.
// It's backed by a very simple object tracker that processes creates, updates and deletions as-is,
// without applying any validations and/or defaults. It shouldn't be considered a replacement
// for a real clientset and is mostly useful in simple unit tests.
func NewSimpleClientset(objects ...runtime.Object) *Clientset {
o := testing.NewObjectTracker(scheme, codecs.UniversalDecoder())
for _, obj := range objects {
if err := o.Add(obj); err != nil {
panic(err)
}
}
cs := &Clientset{}
cs.discovery = &fakediscovery.FakeDiscovery{Fake: &cs.Fake}
cs.AddReactor("*", "*", testing.ObjectReaction(o))
cs.AddWatchReactor("*", func(action testing.Action) (handled bool, ret watch.Interface, err error) {
gvr := action.GetResource()
ns := action.GetNamespace()
watch, err := o.Watch(gvr, ns)
if err != nil {
return false, nil, err
}
return true, watch, nil
})
return cs
}
// Clientset implements clientset.Interface. Meant to be embedded into a
// struct to get a default implementation. This makes faking out just the method
// you want to test easier.
type Clientset struct {
testing.Fake
discovery *fakediscovery.FakeDiscovery
}
func (c *Clientset) Discovery() discovery.DiscoveryInterface {
return c.discovery
}
var _ clientset.Interface = &Clientset{}
// KubeflowV1alpha1 retrieves the KubeflowV1alpha1Client
func (c *Clientset) KubeflowV1alpha1() kubeflowv1alpha1.KubeflowV1alpha1Interface {
return &fakekubeflowv1alpha1.FakeKubeflowV1alpha1{Fake: &c.Fake}
}
// Kubeflow retrieves the KubeflowV1alpha1Client
func (c *Clientset) Kubeflow() kubeflowv1alpha1.KubeflowV1alpha1Interface {
return &fakekubeflowv1alpha1.FakeKubeflowV1alpha1{Fake: &c.Fake}
}

View File

@ -0,0 +1,18 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
// This package has the automatically generated fake clientset.
package fake

View File

@ -0,0 +1,52 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package fake
import (
kubeflowv1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
schema "k8s.io/apimachinery/pkg/runtime/schema"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
)
var scheme = runtime.NewScheme()
var codecs = serializer.NewCodecFactory(scheme)
var parameterCodec = runtime.NewParameterCodec(scheme)
func init() {
v1.AddToGroupVersion(scheme, schema.GroupVersion{Version: "v1"})
AddToScheme(scheme)
}
// AddToScheme adds all types of this clientset into the given scheme. This allows composition
// of clientsets, like in:
//
// import (
// "k8s.io/client-go/kubernetes"
// clientsetscheme "k8s.io/client-go/kubernetes/scheme"
// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
// )
//
// kclientset, _ := kubernetes.NewForConfig(c)
// aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
//
// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
// correctly.
func AddToScheme(scheme *runtime.Scheme) {
kubeflowv1alpha1.AddToScheme(scheme)
}

View File

@ -0,0 +1,18 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
// This package contains the scheme of the automatically generated clientset.
package scheme

View File

@ -0,0 +1,52 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package scheme
import (
kubeflowv1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
schema "k8s.io/apimachinery/pkg/runtime/schema"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
)
var Scheme = runtime.NewScheme()
var Codecs = serializer.NewCodecFactory(Scheme)
var ParameterCodec = runtime.NewParameterCodec(Scheme)
func init() {
v1.AddToGroupVersion(Scheme, schema.GroupVersion{Version: "v1"})
AddToScheme(Scheme)
}
// AddToScheme adds all types of this clientset into the given scheme. This allows composition
// of clientsets, like in:
//
// import (
// "k8s.io/client-go/kubernetes"
// clientsetscheme "k8s.io/client-go/kubernetes/scheme"
// aggregatorclientsetscheme "k8s.io/kube-aggregator/pkg/client/clientset_generated/clientset/scheme"
// )
//
// kclientset, _ := kubernetes.NewForConfig(c)
// aggregatorclientsetscheme.AddToScheme(clientsetscheme.Scheme)
//
// After this, RawExtensions in Kubernetes types will serialize kube-aggregator types
// correctly.
func AddToScheme(scheme *runtime.Scheme) {
kubeflowv1alpha1.AddToScheme(scheme)
}

View File

@ -0,0 +1,18 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
// This package has the automatically generated typed clients.
package v1alpha1

View File

@ -0,0 +1,18 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
// Package fake has the automatically generated clients.
package fake

View File

@ -0,0 +1,38 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package fake
import (
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/typed/kubeflow/v1alpha1"
rest "k8s.io/client-go/rest"
testing "k8s.io/client-go/testing"
)
type FakeKubeflowV1alpha1 struct {
*testing.Fake
}
func (c *FakeKubeflowV1alpha1) MPIJobs(namespace string) v1alpha1.MPIJobInterface {
return &FakeMPIJobs{c, namespace}
}
// RESTClient returns a RESTClient that is used to communicate
// with API server by this client implementation.
func (c *FakeKubeflowV1alpha1) RESTClient() rest.Interface {
var ret *rest.RESTClient
return ret
}

View File

@ -0,0 +1,138 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package fake
import (
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
labels "k8s.io/apimachinery/pkg/labels"
schema "k8s.io/apimachinery/pkg/runtime/schema"
types "k8s.io/apimachinery/pkg/types"
watch "k8s.io/apimachinery/pkg/watch"
testing "k8s.io/client-go/testing"
)
// FakeMPIJobs implements MPIJobInterface
type FakeMPIJobs struct {
Fake *FakeKubeflowV1alpha1
ns string
}
var mpijobsResource = schema.GroupVersionResource{Group: "kubeflow.org", Version: "v1alpha1", Resource: "mpijobs"}
var mpijobsKind = schema.GroupVersionKind{Group: "kubeflow.org", Version: "v1alpha1", Kind: "MPIJob"}
// Get takes name of the mPIJob, and returns the corresponding mPIJob object, and an error if there is any.
func (c *FakeMPIJobs) Get(name string, options v1.GetOptions) (result *v1alpha1.MPIJob, err error) {
obj, err := c.Fake.
Invokes(testing.NewGetAction(mpijobsResource, c.ns, name), &v1alpha1.MPIJob{})
if obj == nil {
return nil, err
}
return obj.(*v1alpha1.MPIJob), err
}
// List takes label and field selectors, and returns the list of MPIJobs that match those selectors.
func (c *FakeMPIJobs) List(opts v1.ListOptions) (result *v1alpha1.MPIJobList, err error) {
obj, err := c.Fake.
Invokes(testing.NewListAction(mpijobsResource, mpijobsKind, c.ns, opts), &v1alpha1.MPIJobList{})
if obj == nil {
return nil, err
}
label, _, _ := testing.ExtractFromListOptions(opts)
if label == nil {
label = labels.Everything()
}
list := &v1alpha1.MPIJobList{ListMeta: obj.(*v1alpha1.MPIJobList).ListMeta}
for _, item := range obj.(*v1alpha1.MPIJobList).Items {
if label.Matches(labels.Set(item.Labels)) {
list.Items = append(list.Items, item)
}
}
return list, err
}
// Watch returns a watch.Interface that watches the requested mPIJobs.
func (c *FakeMPIJobs) Watch(opts v1.ListOptions) (watch.Interface, error) {
return c.Fake.
InvokesWatch(testing.NewWatchAction(mpijobsResource, c.ns, opts))
}
// Create takes the representation of a mPIJob and creates it. Returns the server's representation of the mPIJob, and an error, if there is any.
func (c *FakeMPIJobs) Create(mPIJob *v1alpha1.MPIJob) (result *v1alpha1.MPIJob, err error) {
obj, err := c.Fake.
Invokes(testing.NewCreateAction(mpijobsResource, c.ns, mPIJob), &v1alpha1.MPIJob{})
if obj == nil {
return nil, err
}
return obj.(*v1alpha1.MPIJob), err
}
// Update takes the representation of a mPIJob and updates it. Returns the server's representation of the mPIJob, and an error, if there is any.
func (c *FakeMPIJobs) Update(mPIJob *v1alpha1.MPIJob) (result *v1alpha1.MPIJob, err error) {
obj, err := c.Fake.
Invokes(testing.NewUpdateAction(mpijobsResource, c.ns, mPIJob), &v1alpha1.MPIJob{})
if obj == nil {
return nil, err
}
return obj.(*v1alpha1.MPIJob), err
}
// UpdateStatus was generated because the type contains a Status member.
// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
func (c *FakeMPIJobs) UpdateStatus(mPIJob *v1alpha1.MPIJob) (*v1alpha1.MPIJob, error) {
obj, err := c.Fake.
Invokes(testing.NewUpdateSubresourceAction(mpijobsResource, "status", c.ns, mPIJob), &v1alpha1.MPIJob{})
if obj == nil {
return nil, err
}
return obj.(*v1alpha1.MPIJob), err
}
// Delete takes name of the mPIJob and deletes it. Returns an error if one occurs.
func (c *FakeMPIJobs) Delete(name string, options *v1.DeleteOptions) error {
_, err := c.Fake.
Invokes(testing.NewDeleteAction(mpijobsResource, c.ns, name), &v1alpha1.MPIJob{})
return err
}
// DeleteCollection deletes a collection of objects.
func (c *FakeMPIJobs) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error {
action := testing.NewDeleteCollectionAction(mpijobsResource, c.ns, listOptions)
_, err := c.Fake.Invokes(action, &v1alpha1.MPIJobList{})
return err
}
// Patch applies the patch and returns the patched mPIJob.
func (c *FakeMPIJobs) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.MPIJob, err error) {
obj, err := c.Fake.
Invokes(testing.NewPatchSubresourceAction(mpijobsResource, c.ns, name, data, subresources...), &v1alpha1.MPIJob{})
if obj == nil {
return nil, err
}
return obj.(*v1alpha1.MPIJob), err
}

View File

@ -0,0 +1,19 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package v1alpha1
type MPIJobExpansion interface{}

View File

@ -0,0 +1,88 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package v1alpha1
import (
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
"github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/scheme"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
rest "k8s.io/client-go/rest"
)
type KubeflowV1alpha1Interface interface {
RESTClient() rest.Interface
MPIJobsGetter
}
// KubeflowV1alpha1Client is used to interact with features provided by the kubeflow.org group.
type KubeflowV1alpha1Client struct {
restClient rest.Interface
}
func (c *KubeflowV1alpha1Client) MPIJobs(namespace string) MPIJobInterface {
return newMPIJobs(c, namespace)
}
// NewForConfig creates a new KubeflowV1alpha1Client for the given config.
func NewForConfig(c *rest.Config) (*KubeflowV1alpha1Client, error) {
config := *c
if err := setConfigDefaults(&config); err != nil {
return nil, err
}
client, err := rest.RESTClientFor(&config)
if err != nil {
return nil, err
}
return &KubeflowV1alpha1Client{client}, nil
}
// NewForConfigOrDie creates a new KubeflowV1alpha1Client for the given config and
// panics if there is an error in the config.
func NewForConfigOrDie(c *rest.Config) *KubeflowV1alpha1Client {
client, err := NewForConfig(c)
if err != nil {
panic(err)
}
return client
}
// New creates a new KubeflowV1alpha1Client for the given RESTClient.
func New(c rest.Interface) *KubeflowV1alpha1Client {
return &KubeflowV1alpha1Client{c}
}
func setConfigDefaults(config *rest.Config) error {
gv := v1alpha1.SchemeGroupVersion
config.GroupVersion = &gv
config.APIPath = "/apis"
config.NegotiatedSerializer = serializer.DirectCodecFactory{CodecFactory: scheme.Codecs}
if config.UserAgent == "" {
config.UserAgent = rest.DefaultKubernetesUserAgent()
}
return nil
}
// RESTClient returns a RESTClient that is used to communicate
// with API server by this client implementation.
func (c *KubeflowV1alpha1Client) RESTClient() rest.Interface {
if c == nil {
return nil
}
return c.restClient
}

View File

@ -0,0 +1,172 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by client-gen. DO NOT EDIT.
package v1alpha1
import (
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
scheme "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/scheme"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
types "k8s.io/apimachinery/pkg/types"
watch "k8s.io/apimachinery/pkg/watch"
rest "k8s.io/client-go/rest"
)
// MPIJobsGetter has a method to return a MPIJobInterface.
// A group's client should implement this interface.
type MPIJobsGetter interface {
MPIJobs(namespace string) MPIJobInterface
}
// MPIJobInterface has methods to work with MPIJob resources.
type MPIJobInterface interface {
Create(*v1alpha1.MPIJob) (*v1alpha1.MPIJob, error)
Update(*v1alpha1.MPIJob) (*v1alpha1.MPIJob, error)
UpdateStatus(*v1alpha1.MPIJob) (*v1alpha1.MPIJob, error)
Delete(name string, options *v1.DeleteOptions) error
DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error
Get(name string, options v1.GetOptions) (*v1alpha1.MPIJob, error)
List(opts v1.ListOptions) (*v1alpha1.MPIJobList, error)
Watch(opts v1.ListOptions) (watch.Interface, error)
Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.MPIJob, err error)
MPIJobExpansion
}
// mPIJobs implements MPIJobInterface
type mPIJobs struct {
client rest.Interface
ns string
}
// newMPIJobs returns a MPIJobs
func newMPIJobs(c *KubeflowV1alpha1Client, namespace string) *mPIJobs {
return &mPIJobs{
client: c.RESTClient(),
ns: namespace,
}
}
// Get takes name of the mPIJob, and returns the corresponding mPIJob object, and an error if there is any.
func (c *mPIJobs) Get(name string, options v1.GetOptions) (result *v1alpha1.MPIJob, err error) {
result = &v1alpha1.MPIJob{}
err = c.client.Get().
Namespace(c.ns).
Resource("mpijobs").
Name(name).
VersionedParams(&options, scheme.ParameterCodec).
Do().
Into(result)
return
}
// List takes label and field selectors, and returns the list of MPIJobs that match those selectors.
func (c *mPIJobs) List(opts v1.ListOptions) (result *v1alpha1.MPIJobList, err error) {
result = &v1alpha1.MPIJobList{}
err = c.client.Get().
Namespace(c.ns).
Resource("mpijobs").
VersionedParams(&opts, scheme.ParameterCodec).
Do().
Into(result)
return
}
// Watch returns a watch.Interface that watches the requested mPIJobs.
func (c *mPIJobs) Watch(opts v1.ListOptions) (watch.Interface, error) {
opts.Watch = true
return c.client.Get().
Namespace(c.ns).
Resource("mpijobs").
VersionedParams(&opts, scheme.ParameterCodec).
Watch()
}
// Create takes the representation of a mPIJob and creates it. Returns the server's representation of the mPIJob, and an error, if there is any.
func (c *mPIJobs) Create(mPIJob *v1alpha1.MPIJob) (result *v1alpha1.MPIJob, err error) {
result = &v1alpha1.MPIJob{}
err = c.client.Post().
Namespace(c.ns).
Resource("mpijobs").
Body(mPIJob).
Do().
Into(result)
return
}
// Update takes the representation of a mPIJob and updates it. Returns the server's representation of the mPIJob, and an error, if there is any.
func (c *mPIJobs) Update(mPIJob *v1alpha1.MPIJob) (result *v1alpha1.MPIJob, err error) {
result = &v1alpha1.MPIJob{}
err = c.client.Put().
Namespace(c.ns).
Resource("mpijobs").
Name(mPIJob.Name).
Body(mPIJob).
Do().
Into(result)
return
}
// UpdateStatus was generated because the type contains a Status member.
// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus().
func (c *mPIJobs) UpdateStatus(mPIJob *v1alpha1.MPIJob) (result *v1alpha1.MPIJob, err error) {
result = &v1alpha1.MPIJob{}
err = c.client.Put().
Namespace(c.ns).
Resource("mpijobs").
Name(mPIJob.Name).
SubResource("status").
Body(mPIJob).
Do().
Into(result)
return
}
// Delete takes name of the mPIJob and deletes it. Returns an error if one occurs.
func (c *mPIJobs) Delete(name string, options *v1.DeleteOptions) error {
return c.client.Delete().
Namespace(c.ns).
Resource("mpijobs").
Name(name).
Body(options).
Do().
Error()
}
// DeleteCollection deletes a collection of objects.
func (c *mPIJobs) DeleteCollection(options *v1.DeleteOptions, listOptions v1.ListOptions) error {
return c.client.Delete().
Namespace(c.ns).
Resource("mpijobs").
VersionedParams(&listOptions, scheme.ParameterCodec).
Body(options).
Do().
Error()
}
// Patch applies the patch and returns the patched mPIJob.
func (c *mPIJobs) Patch(name string, pt types.PatchType, data []byte, subresources ...string) (result *v1alpha1.MPIJob, err error) {
result = &v1alpha1.MPIJob{}
err = c.client.Patch(pt).
Namespace(c.ns).
Resource("mpijobs").
SubResource(subresources...).
Name(name).
Body(data).
Do().
Into(result)
return
}

View File

@ -0,0 +1,178 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package externalversions
import (
reflect "reflect"
sync "sync"
time "time"
versioned "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
internalinterfaces "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/internalinterfaces"
kubeflow "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/kubeflow"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
schema "k8s.io/apimachinery/pkg/runtime/schema"
cache "k8s.io/client-go/tools/cache"
)
// SharedInformerOption defines the functional option type for SharedInformerFactory.
type SharedInformerOption func(*sharedInformerFactory) *sharedInformerFactory
type sharedInformerFactory struct {
client versioned.Interface
namespace string
tweakListOptions internalinterfaces.TweakListOptionsFunc
lock sync.Mutex
defaultResync time.Duration
customResync map[reflect.Type]time.Duration
informers map[reflect.Type]cache.SharedIndexInformer
// startedInformers is used for tracking which informers have been started.
// This allows Start() to be called multiple times safely.
startedInformers map[reflect.Type]bool
}
// WithCustomResyncConfig sets a custom resync period for the specified informer types.
func WithCustomResyncConfig(resyncConfig map[v1.Object]time.Duration) SharedInformerOption {
return func(factory *sharedInformerFactory) *sharedInformerFactory {
for k, v := range resyncConfig {
factory.customResync[reflect.TypeOf(k)] = v
}
return factory
}
}
// WithTweakListOptions sets a custom filter on all listers of the configured SharedInformerFactory.
func WithTweakListOptions(tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerOption {
return func(factory *sharedInformerFactory) *sharedInformerFactory {
factory.tweakListOptions = tweakListOptions
return factory
}
}
// WithNamespace limits the SharedInformerFactory to the specified namespace.
func WithNamespace(namespace string) SharedInformerOption {
return func(factory *sharedInformerFactory) *sharedInformerFactory {
factory.namespace = namespace
return factory
}
}
// NewSharedInformerFactory constructs a new instance of sharedInformerFactory for all namespaces.
func NewSharedInformerFactory(client versioned.Interface, defaultResync time.Duration) SharedInformerFactory {
return NewSharedInformerFactoryWithOptions(client, defaultResync)
}
// NewFilteredSharedInformerFactory constructs a new instance of sharedInformerFactory.
// Listers obtained via this SharedInformerFactory will be subject to the same filters
// as specified here.
// Deprecated: Please use NewSharedInformerFactoryWithOptions instead
func NewFilteredSharedInformerFactory(client versioned.Interface, defaultResync time.Duration, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) SharedInformerFactory {
return NewSharedInformerFactoryWithOptions(client, defaultResync, WithNamespace(namespace), WithTweakListOptions(tweakListOptions))
}
// NewSharedInformerFactoryWithOptions constructs a new instance of a SharedInformerFactory with additional options.
func NewSharedInformerFactoryWithOptions(client versioned.Interface, defaultResync time.Duration, options ...SharedInformerOption) SharedInformerFactory {
factory := &sharedInformerFactory{
client: client,
namespace: v1.NamespaceAll,
defaultResync: defaultResync,
informers: make(map[reflect.Type]cache.SharedIndexInformer),
startedInformers: make(map[reflect.Type]bool),
customResync: make(map[reflect.Type]time.Duration),
}
// Apply all options
for _, opt := range options {
factory = opt(factory)
}
return factory
}
// Start initializes all requested informers.
func (f *sharedInformerFactory) Start(stopCh <-chan struct{}) {
f.lock.Lock()
defer f.lock.Unlock()
for informerType, informer := range f.informers {
if !f.startedInformers[informerType] {
go informer.Run(stopCh)
f.startedInformers[informerType] = true
}
}
}
// WaitForCacheSync waits for all started informers' cache were synced.
func (f *sharedInformerFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool {
informers := func() map[reflect.Type]cache.SharedIndexInformer {
f.lock.Lock()
defer f.lock.Unlock()
informers := map[reflect.Type]cache.SharedIndexInformer{}
for informerType, informer := range f.informers {
if f.startedInformers[informerType] {
informers[informerType] = informer
}
}
return informers
}()
res := map[reflect.Type]bool{}
for informType, informer := range informers {
res[informType] = cache.WaitForCacheSync(stopCh, informer.HasSynced)
}
return res
}
// InternalInformerFor returns the SharedIndexInformer for obj using an internal
// client.
func (f *sharedInformerFactory) InformerFor(obj runtime.Object, newFunc internalinterfaces.NewInformerFunc) cache.SharedIndexInformer {
f.lock.Lock()
defer f.lock.Unlock()
informerType := reflect.TypeOf(obj)
informer, exists := f.informers[informerType]
if exists {
return informer
}
resyncPeriod, exists := f.customResync[informerType]
if !exists {
resyncPeriod = f.defaultResync
}
informer = newFunc(f.client, resyncPeriod)
f.informers[informerType] = informer
return informer
}
// SharedInformerFactory provides shared informers for resources in all known
// API group versions.
type SharedInformerFactory interface {
internalinterfaces.SharedInformerFactory
ForResource(resource schema.GroupVersionResource) (GenericInformer, error)
WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool
Kubeflow() kubeflow.Interface
}
func (f *sharedInformerFactory) Kubeflow() kubeflow.Interface {
return kubeflow.New(f, f.namespace, f.tweakListOptions)
}

View File

@ -0,0 +1,60 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package externalversions
import (
"fmt"
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
schema "k8s.io/apimachinery/pkg/runtime/schema"
cache "k8s.io/client-go/tools/cache"
)
// GenericInformer is type of SharedIndexInformer which will locate and delegate to other
// sharedInformers based on type
type GenericInformer interface {
Informer() cache.SharedIndexInformer
Lister() cache.GenericLister
}
type genericInformer struct {
informer cache.SharedIndexInformer
resource schema.GroupResource
}
// Informer returns the SharedIndexInformer.
func (f *genericInformer) Informer() cache.SharedIndexInformer {
return f.informer
}
// Lister returns the GenericLister.
func (f *genericInformer) Lister() cache.GenericLister {
return cache.NewGenericLister(f.Informer().GetIndexer(), f.resource)
}
// ForResource gives generic access to a shared informer of the matching type
// TODO extend this to unknown resources with a client pool
func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) {
switch resource {
// Group=kubeflow.org, Version=v1alpha1
case v1alpha1.SchemeGroupVersion.WithResource("mpijobs"):
return &genericInformer{resource: resource.GroupResource(), informer: f.Kubeflow().V1alpha1().MPIJobs().Informer()}, nil
}
return nil, fmt.Errorf("no informer found for %v", resource)
}

View File

@ -0,0 +1,36 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package internalinterfaces
import (
time "time"
versioned "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
cache "k8s.io/client-go/tools/cache"
)
type NewInformerFunc func(versioned.Interface, time.Duration) cache.SharedIndexInformer
// SharedInformerFactory a small interface to allow for adding an informer without an import cycle
type SharedInformerFactory interface {
Start(stopCh <-chan struct{})
InformerFor(obj runtime.Object, newFunc NewInformerFunc) cache.SharedIndexInformer
}
type TweakListOptionsFunc func(*v1.ListOptions)

View File

@ -0,0 +1,44 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package kubeflow
import (
internalinterfaces "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/internalinterfaces"
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/kubeflow/v1alpha1"
)
// Interface provides access to each of this group's versions.
type Interface interface {
// V1alpha1 provides access to shared informers for resources in V1alpha1.
V1alpha1() v1alpha1.Interface
}
type group struct {
factory internalinterfaces.SharedInformerFactory
namespace string
tweakListOptions internalinterfaces.TweakListOptionsFunc
}
// New returns a new Interface.
func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
return &group{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
}
// V1alpha1 returns a new v1alpha1.Interface.
func (g *group) V1alpha1() v1alpha1.Interface {
return v1alpha1.New(g.factory, g.namespace, g.tweakListOptions)
}

View File

@ -0,0 +1,43 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package v1alpha1
import (
internalinterfaces "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/internalinterfaces"
)
// Interface provides access to all the informers in this group version.
type Interface interface {
// MPIJobs returns a MPIJobInformer.
MPIJobs() MPIJobInformer
}
type version struct {
factory internalinterfaces.SharedInformerFactory
namespace string
tweakListOptions internalinterfaces.TweakListOptionsFunc
}
// New returns a new Interface.
func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakListOptions internalinterfaces.TweakListOptionsFunc) Interface {
return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions}
}
// MPIJobs returns a MPIJobInformer.
func (v *version) MPIJobs() MPIJobInformer {
return &mPIJobInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions}
}

View File

@ -0,0 +1,87 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by informer-gen. DO NOT EDIT.
package v1alpha1
import (
time "time"
kubeflow_v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
versioned "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
internalinterfaces "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/internalinterfaces"
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/client/listers/kubeflow/v1alpha1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
watch "k8s.io/apimachinery/pkg/watch"
cache "k8s.io/client-go/tools/cache"
)
// MPIJobInformer provides access to a shared informer and lister for
// MPIJobs.
type MPIJobInformer interface {
Informer() cache.SharedIndexInformer
Lister() v1alpha1.MPIJobLister
}
type mPIJobInformer struct {
factory internalinterfaces.SharedInformerFactory
tweakListOptions internalinterfaces.TweakListOptionsFunc
namespace string
}
// NewMPIJobInformer constructs a new informer for MPIJob type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewMPIJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer {
return NewFilteredMPIJobInformer(client, namespace, resyncPeriod, indexers, nil)
}
// NewFilteredMPIJobInformer constructs a new informer for MPIJob type.
// Always prefer using an informer factory to get a shared informer instead of getting an independent
// one. This reduces memory footprint and number of connections to the server.
func NewFilteredMPIJobInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer {
return cache.NewSharedIndexInformer(
&cache.ListWatch{
ListFunc: func(options v1.ListOptions) (runtime.Object, error) {
if tweakListOptions != nil {
tweakListOptions(&options)
}
return client.KubeflowV1alpha1().MPIJobs(namespace).List(options)
},
WatchFunc: func(options v1.ListOptions) (watch.Interface, error) {
if tweakListOptions != nil {
tweakListOptions(&options)
}
return client.KubeflowV1alpha1().MPIJobs(namespace).Watch(options)
},
},
&kubeflow_v1alpha1.MPIJob{},
resyncPeriod,
indexers,
)
}
func (f *mPIJobInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer {
return NewFilteredMPIJobInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions)
}
func (f *mPIJobInformer) Informer() cache.SharedIndexInformer {
return f.factory.InformerFor(&kubeflow_v1alpha1.MPIJob{}, f.defaultInformer)
}
func (f *mPIJobInformer) Lister() v1alpha1.MPIJobLister {
return v1alpha1.NewMPIJobLister(f.Informer().GetIndexer())
}

View File

@ -0,0 +1,25 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by lister-gen. DO NOT EDIT.
package v1alpha1
// MPIJobListerExpansion allows custom methods to be added to
// MPIJobLister.
type MPIJobListerExpansion interface{}
// MPIJobNamespaceListerExpansion allows custom methods to be added to
// MPIJobNamespaceLister.
type MPIJobNamespaceListerExpansion interface{}

View File

@ -0,0 +1,92 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by lister-gen. DO NOT EDIT.
package v1alpha1
import (
v1alpha1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/cache"
)
// MPIJobLister helps list MPIJobs.
type MPIJobLister interface {
// List lists all MPIJobs in the indexer.
List(selector labels.Selector) (ret []*v1alpha1.MPIJob, err error)
// MPIJobs returns an object that can list and get MPIJobs.
MPIJobs(namespace string) MPIJobNamespaceLister
MPIJobListerExpansion
}
// mPIJobLister implements the MPIJobLister interface.
type mPIJobLister struct {
indexer cache.Indexer
}
// NewMPIJobLister returns a new MPIJobLister.
func NewMPIJobLister(indexer cache.Indexer) MPIJobLister {
return &mPIJobLister{indexer: indexer}
}
// List lists all MPIJobs in the indexer.
func (s *mPIJobLister) List(selector labels.Selector) (ret []*v1alpha1.MPIJob, err error) {
err = cache.ListAll(s.indexer, selector, func(m interface{}) {
ret = append(ret, m.(*v1alpha1.MPIJob))
})
return ret, err
}
// MPIJobs returns an object that can list and get MPIJobs.
func (s *mPIJobLister) MPIJobs(namespace string) MPIJobNamespaceLister {
return mPIJobNamespaceLister{indexer: s.indexer, namespace: namespace}
}
// MPIJobNamespaceLister helps list and get MPIJobs.
type MPIJobNamespaceLister interface {
// List lists all MPIJobs in the indexer for a given namespace.
List(selector labels.Selector) (ret []*v1alpha1.MPIJob, err error)
// Get retrieves the MPIJob from the indexer for a given namespace and name.
Get(name string) (*v1alpha1.MPIJob, error)
MPIJobNamespaceListerExpansion
}
// mPIJobNamespaceLister implements the MPIJobNamespaceLister
// interface.
type mPIJobNamespaceLister struct {
indexer cache.Indexer
namespace string
}
// List lists all MPIJobs in the indexer for a given namespace.
func (s mPIJobNamespaceLister) List(selector labels.Selector) (ret []*v1alpha1.MPIJob, err error) {
err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) {
ret = append(ret, m.(*v1alpha1.MPIJob))
})
return ret, err
}
// Get retrieves the MPIJob from the indexer for a given namespace and name.
func (s mPIJobNamespaceLister) Get(name string) (*v1alpha1.MPIJob, error) {
obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name)
if err != nil {
return nil, err
}
if !exists {
return nil, errors.NewNotFound(v1alpha1.Resource("mpijob"), name)
}
return obj.(*v1alpha1.MPIJob), nil
}

View File

@ -0,0 +1,997 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package controllers
import (
"bytes"
"fmt"
"time"
"github.com/golang/glog"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
appsinformers "k8s.io/client-go/informers/apps/v1"
batchinformers "k8s.io/client-go/informers/batch/v1"
coreinformers "k8s.io/client-go/informers/core/v1"
rbacinformers "k8s.io/client-go/informers/rbac/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
appslisters "k8s.io/client-go/listers/apps/v1"
batchlisters "k8s.io/client-go/listers/batch/v1"
corelisters "k8s.io/client-go/listers/core/v1"
rbaclisters "k8s.io/client-go/listers/rbac/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/workqueue"
kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
clientset "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned"
kubeflowScheme "github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/scheme"
informers "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions/kubeflow/v1alpha1"
listers "github.com/kubeflow/mpi-operator/pkg/client/listers/kubeflow/v1alpha1"
)
const (
controllerAgentName = "mpi-job-controller"
configSuffix = "-config"
configVolumeName = "mpi-job-config"
configMountPath = "/etc/mpi"
kubexecScriptName = "kubexec.sh"
hostfileName = "hostfile"
kubectlDeliveryName = "kubectl-delivery"
kubectlTargetDirEnv = "TARGET_DIR"
kubectlVolumeName = "mpi-job-kubectl"
kubectlMountPath = "/opt/kube"
launcherSuffix = "-launcher"
workerSuffix = "-worker"
gpuResourceName = "nvidia.com/gpu"
)
const (
// SuccessSynced is used as part of the Event 'reason' when an MPIJob is
// synced.
SuccessSynced = "Synced"
// ErrResourceExists is used as part of the Event 'reason' when an MPIJob
// fails to sync due to dependent resources of the same name already
// existing.
ErrResourceExists = "ErrResourceExists"
// MessageResourceExists is the message used for Events when a resource
// fails to sync due to dependent resources already existing.
MessageResourceExists = "Resource %q already exists and is not managed by MPIJob"
// MessageResourceSynced is the message used for an Event fired when an
// MPIJob is synced successfully.
MessageResourceSynced = "MPIJob synced successfully"
)
// MPIJobController is the controller implementation for MPIJob resources.
type MPIJobController struct {
// kubeClient is a standard kubernetes clientset.
kubeClient kubernetes.Interface
// kubeflowClient is a clientset for our own API group.
kubeflowClient clientset.Interface
configMapLister corelisters.ConfigMapLister
configMapSynced cache.InformerSynced
serviceAccountLister corelisters.ServiceAccountLister
serviceAccountSynced cache.InformerSynced
roleLister rbaclisters.RoleLister
roleSynced cache.InformerSynced
roleBindingLister rbaclisters.RoleBindingLister
roleBindingSynced cache.InformerSynced
statefulSetLister appslisters.StatefulSetLister
statefulSetSynced cache.InformerSynced
jobLister batchlisters.JobLister
jobSynced cache.InformerSynced
mpiJobLister listers.MPIJobLister
mpiJobSynced cache.InformerSynced
// queue is a rate limited work queue. This is used to queue work to be
// processed instead of performing it as soon as a change happens. This
// means we can ensure we only process a fixed amount of resources at a
// time, and makes it easy to ensure we are never processing the same item
// simultaneously in two different workers.
queue workqueue.RateLimitingInterface
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
recorder record.EventRecorder
// The maximum number of GPUs per node.
gpusPerNode int
// The container image used to deliver the kubectl binary.
kubectlDeliveryImage string
}
// NewMPIJobController returns a new MPIJob controller.
func NewMPIJobController(
kubeClient kubernetes.Interface,
kubeflowClient clientset.Interface,
configMapInformer coreinformers.ConfigMapInformer,
serviceAccountInformer coreinformers.ServiceAccountInformer,
roleInformer rbacinformers.RoleInformer,
roleBindingInformer rbacinformers.RoleBindingInformer,
statefulSetInformer appsinformers.StatefulSetInformer,
jobInformer batchinformers.JobInformer,
mpiJobInformer informers.MPIJobInformer,
gpusPerNode int,
kubectlDeliveryImage string) *MPIJobController {
// Create event broadcaster.
// Add mpi-job-controller types to the default Kubernetes Scheme so Events
// can be logged for mpi-job-controller types.
kubeflowScheme.AddToScheme(scheme.Scheme)
glog.V(4).Info("Creating event broadcaster")
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(glog.Infof)
eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
recorder := eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: controllerAgentName})
controller := &MPIJobController{
kubeClient: kubeClient,
kubeflowClient: kubeflowClient,
configMapLister: configMapInformer.Lister(),
configMapSynced: configMapInformer.Informer().HasSynced,
serviceAccountLister: serviceAccountInformer.Lister(),
serviceAccountSynced: serviceAccountInformer.Informer().HasSynced,
roleLister: roleInformer.Lister(),
roleSynced: roleInformer.Informer().HasSynced,
roleBindingLister: roleBindingInformer.Lister(),
roleBindingSynced: roleBindingInformer.Informer().HasSynced,
statefulSetLister: statefulSetInformer.Lister(),
statefulSetSynced: statefulSetInformer.Informer().HasSynced,
jobLister: jobInformer.Lister(),
jobSynced: jobInformer.Informer().HasSynced,
mpiJobLister: mpiJobInformer.Lister(),
mpiJobSynced: mpiJobInformer.Informer().HasSynced,
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "MPIJobs"),
recorder: recorder,
gpusPerNode: gpusPerNode,
kubectlDeliveryImage: kubectlDeliveryImage,
}
glog.Info("Setting up event handlers")
// Set up an event handler for when MPIJob resources change.
mpiJobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.enqueueMPIJob,
UpdateFunc: func(old, new interface{}) {
controller.enqueueMPIJob(new)
},
})
// Set up an event handler for when dependent resources change. This
// handler will lookup the owner of the given resource, and if it is
// owned by an MPIJob resource will enqueue that MPIJob resource for
// processing. This way, we don't need to implement custom logic for
// handling dependent resources. More info on this pattern:
// https://github.com/kubernetes/community/blob/8cafef897a22026d42f5e5bb3f104febe7e29830/contributors/devel/controllers.md
configMapInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newConfigMap := new.(*corev1.ConfigMap)
oldConfigMap := old.(*corev1.ConfigMap)
if newConfigMap.ResourceVersion == oldConfigMap.ResourceVersion {
// Periodic re-sync will send update events for all known
// ConfigMaps. Two different versions of the same ConfigMap
// will always have different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
serviceAccountInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newServiceAccount := new.(*corev1.ServiceAccount)
oldServiceAccount := old.(*corev1.ServiceAccount)
if newServiceAccount.ResourceVersion == oldServiceAccount.ResourceVersion {
// Periodic re-sync will send update events for all known
// ServiceAccounts. Two different versions of the same ServiceAccount
// will always have different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
roleInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newRole := new.(*rbacv1.Role)
oldRole := old.(*rbacv1.Role)
if newRole.ResourceVersion == oldRole.ResourceVersion {
// Periodic re-sync will send update events for all known
// Roles. Two different versions of the same Role
// will always have different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
roleBindingInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newRoleBinding := new.(*rbacv1.RoleBinding)
oldRoleBinding := old.(*rbacv1.RoleBinding)
if newRoleBinding.ResourceVersion == oldRoleBinding.ResourceVersion {
// Periodic re-sync will send update events for all known
// RoleBindings. Two different versions of the same RoleBinding
// will always have different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
statefulSetInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newStatefulSet := new.(*appsv1.StatefulSet)
oldStatefulSet := old.(*appsv1.StatefulSet)
if newStatefulSet.ResourceVersion == oldStatefulSet.ResourceVersion {
// Periodic re-sync will send update events for all known
// StatefulSets. Two different versions of the same StatefulSet
// will always have different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
jobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.handleObject,
UpdateFunc: func(old, new interface{}) {
newJob := new.(*batchv1.Job)
oldJob := old.(*batchv1.Job)
if newJob.ResourceVersion == oldJob.ResourceVersion {
// Periodic re-sync will send update events for all known Jobs.
// Two different versions of the same Job will always have
// different RVs.
return
}
controller.handleObject(new)
},
DeleteFunc: controller.handleObject,
})
return controller
}
// Run will set up the event handlers for types we are interested in, as well
// as syncing informer caches and starting workers. It will block until stopCh
// is closed, at which point it will shutdown the work queue and wait for
// workers to finish processing their current work items.
func (c *MPIJobController) Run(threadiness int, stopCh <-chan struct{}) error {
defer runtime.HandleCrash()
defer c.queue.ShutDown()
// Start the informer factories to begin populating the informer caches.
glog.Info("Starting MPIJob controller")
// Wait for the caches to be synced before starting workers.
glog.Info("Waiting for informer caches to sync")
if ok := cache.WaitForCacheSync(stopCh, c.configMapSynced, c.serviceAccountSynced, c.roleSynced, c.roleBindingSynced, c.statefulSetSynced, c.jobSynced, c.mpiJobSynced); !ok {
return fmt.Errorf("failed to wait for caches to sync")
}
glog.Info("Starting workers")
// Launch workers to process MPIJob resources.
for i := 0; i < threadiness; i++ {
go wait.Until(c.runWorker, time.Second, stopCh)
}
glog.Info("Started workers")
<-stopCh
glog.Info("Shutting down workers")
return nil
}
// runWorker is a long-running function that will continually call the
// processNextWorkItem function in order to read and process a message on the
// work queue.
func (c *MPIJobController) runWorker() {
for c.processNextWorkItem() {
}
}
// processNextWorkItem will read a single work item off the work queue and
// attempt to process it, by calling the syncHandler.
func (c *MPIJobController) processNextWorkItem() bool {
obj, shutdown := c.queue.Get()
if shutdown {
return false
}
// We wrap this block in a func so we can defer c.queue.Done.
err := func(obj interface{}) error {
// We call Done here so the work queue knows we have finished
// processing this item. We also must remember to call Forget if we
// do not want this work item being re-queued. For example, we do
// not call Forget if a transient error occurs, instead the item is
// put back on the work queue and attempted again after a back-off
// period.
defer c.queue.Done(obj)
var key string
var ok bool
// We expect strings to come off the work queue. These are of the
// form namespace/name. We do this as the delayed nature of the
// work queue means the items in the informer cache may actually be
// more up to date that when the item was initially put onto the
// work queue.
if key, ok = obj.(string); !ok {
// As the item in the work queue is actually invalid, we call
// Forget here else we'd go into a loop of attempting to
// process a work item that is invalid.
c.queue.Forget(obj)
runtime.HandleError(fmt.Errorf("expected string in workqueue but got %#v", obj))
return nil
}
// Run the syncHandler, passing it the namespace/name string of the
// MPIJob resource to be synced.
if err := c.syncHandler(key); err != nil {
return fmt.Errorf("error syncing '%s': %s", key, err.Error())
}
// Finally, if no error occurs we Forget this item so it does not
// get queued again until another change happens.
c.queue.Forget(obj)
glog.Infof("Successfully synced '%s'", key)
return nil
}(obj)
if err != nil {
runtime.HandleError(err)
return true
}
return true
}
// syncHandler compares the actual state with the desired, and attempts to
// converge the two. It then updates the Status block of the MPIJob resource
// with the current status of the resource.
func (c *MPIJobController) syncHandler(key string) error {
// Convert the namespace/name string into a distinct namespace and name.
namespace, name, err := cache.SplitMetaNamespaceKey(key)
if err != nil {
runtime.HandleError(fmt.Errorf("invalid resource key: %s", key))
return nil
}
// Get the MPIJob with this namespace/name.
mpiJob, err := c.mpiJobLister.MPIJobs(namespace).Get(name)
// The MPIJob may no longer exist, in which case we stop processing.
if errors.IsNotFound(err) {
runtime.HandleError(fmt.Errorf("mpi job '%s' in work queue no longer exists", key))
return nil
}
if err != nil {
return err
}
// Get the launcher Job for this MPIJob.
launcher, err := c.getLauncherJob(mpiJob)
if err != nil {
return err
}
// We're done if the launcher either succeeded or failed.
done := launcher != nil && (launcher.Status.Succeeded == 1 || launcher.Status.Failed == 1)
totalGPUs := getTotalGPUs(mpiJob)
workerReplicas := c.getWorkerReplicas(totalGPUs, done)
gpusPerWorker := totalGPUs
if totalGPUs > c.gpusPerNode {
gpusPerWorker = c.gpusPerNode
}
if !done {
// Get the ConfigMap for this MPIJob.
if config, err := c.getConfigMap(mpiJob, workerReplicas, gpusPerWorker); config == nil || err != nil {
return err
}
// Get the launcher ServiceAccount for this MPIJob.
if sa, err := c.getLauncherServiceAccount(mpiJob); sa == nil || err != nil {
return err
}
// Get the launcher Role for this MPIJob.
if r, err := c.getLauncherRole(mpiJob, workerReplicas); r == nil || err != nil {
return err
}
// Get the launcher RoleBinding for this MPIJob.
if rb, err := c.getLauncherRoleBinding(mpiJob); rb == nil || err != nil {
return err
}
}
worker, err := c.getWorkerStatefulSet(mpiJob, workerReplicas)
if err != nil {
return err
}
// If the worker is ready, start the launcher.
workerReady := workerReplicas == 0 || int(worker.Status.ReadyReplicas) == workerReplicas
if workerReady && launcher == nil {
launcherGPUs := totalGPUs
if launcherGPUs > c.gpusPerNode {
launcherGPUs = c.gpusPerNode
}
launcher, err = c.kubeClient.BatchV1().Jobs(namespace).Create(newLauncher(mpiJob, launcherGPUs, c.kubectlDeliveryImage))
if err != nil {
return err
}
}
// Finally, we update the status block of the MPIJob resource to reflect the
// current state of the world.
err = c.updateMPIJobStatus(mpiJob, launcher, worker)
if err != nil {
return err
}
c.recorder.Event(mpiJob, corev1.EventTypeNormal, SuccessSynced, MessageResourceSynced)
return nil
}
// getLauncherJob gets the launcher Job controlled by this MPIJob.
func (c *MPIJobController) getLauncherJob(mpiJob *kubeflow.MPIJob) (*batchv1.Job, error) {
launcher, err := c.jobLister.Jobs(mpiJob.Namespace).Get(mpiJob.Name + launcherSuffix)
if errors.IsNotFound(err) {
return nil, nil
}
if err != nil {
// If an error occurs during Get, we'll requeue the item so we can
// attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
return nil, err
}
// If the launcher is not controlled by this MPIJob resource, we should log
// a warning to the event recorder and return.
if !metav1.IsControlledBy(launcher, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, launcher.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return launcher, fmt.Errorf(msg)
}
return launcher, nil
}
// getTotalGPUs gets the total number of desired GPUs. Defaults to 1 if not specified.
func getTotalGPUs(mpiJob *kubeflow.MPIJob) int {
totalGPUs := 1
if mpiJob.Spec.GPUs != nil {
totalGPUs = int(*mpiJob.Spec.GPUs)
}
return totalGPUs
}
// getWorkerReplicas gets the desired number of worker replicas.
func (c *MPIJobController) getWorkerReplicas(totalGPUs int, done bool) int {
workerReplicas := 0
if totalGPUs > c.gpusPerNode {
// The launcher also does work, so the # worker replicas needed is deducted by 1.
workerReplicas = totalGPUs/c.gpusPerNode - 1
}
if done {
workerReplicas = 0
}
return workerReplicas
}
// getConfigMap gets the ConfigMap controlled by this MPIJob.
func (c *MPIJobController) getConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int, gpusPerWorker int) (*corev1.ConfigMap, error) {
cm, err := c.configMapLister.ConfigMaps(mpiJob.Namespace).Get(mpiJob.Name + configSuffix)
// If the ConfigMap doesn't exist, we'll create it.
if errors.IsNotFound(err) {
cm, err = c.kubeClient.CoreV1().ConfigMaps(mpiJob.Namespace).Create(newConfigMap(mpiJob, workerReplicas, gpusPerWorker))
}
// If an error occurs during Get/Create, we'll requeue the item so we
// can attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil {
return nil, err
}
// If the ConfigMap is not controlled by this MPIJob resource, we
// should log a warning to the event recorder and return.
if !metav1.IsControlledBy(cm, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, cm.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return nil, fmt.Errorf(msg)
}
return cm, nil
}
// getLauncherServiceAccount gets the launcher ServiceAccount controlled by this MPIJob.
func (c *MPIJobController) getLauncherServiceAccount(mpiJob *kubeflow.MPIJob) (*corev1.ServiceAccount, error) {
sa, err := c.serviceAccountLister.ServiceAccounts(mpiJob.Namespace).Get(mpiJob.Name + launcherSuffix)
// If the ServiceAccount doesn't exist, we'll create it.
if errors.IsNotFound(err) {
sa, err = c.kubeClient.CoreV1().ServiceAccounts(mpiJob.Namespace).Create(newLauncherServiceAccount(mpiJob))
}
// If an error occurs during Get/Create, we'll requeue the item so we
// can attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil {
return nil, err
}
// If the launcher ServiceAccount is not controlled by this MPIJob resource, we
// should log a warning to the event recorder and return.
if !metav1.IsControlledBy(sa, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, sa.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return nil, fmt.Errorf(msg)
}
return sa, nil
}
// getLauncherRole gets the launcher Role controlled by this MPIJob.
func (c *MPIJobController) getLauncherRole(mpiJob *kubeflow.MPIJob, workerReplicas int) (*rbacv1.Role, error) {
role, err := c.roleLister.Roles(mpiJob.Namespace).Get(mpiJob.Name + launcherSuffix)
// If the Role doesn't exist, we'll create it.
if errors.IsNotFound(err) {
role, err = c.kubeClient.RbacV1().Roles(mpiJob.Namespace).Create(newLauncherRole(mpiJob, workerReplicas))
}
// If an error occurs during Get/Create, we'll requeue the item so we
// can attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil {
return nil, err
}
// If the launcher Role is not controlled by this MPIJob resource, we
// should log a warning to the event recorder and return.
if !metav1.IsControlledBy(role, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, role.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return nil, fmt.Errorf(msg)
}
return role, nil
}
// getLauncherRoleBinding gets the launcher RoleBinding controlled by this MPIJob.
func (c *MPIJobController) getLauncherRoleBinding(mpiJob *kubeflow.MPIJob) (*rbacv1.RoleBinding, error) {
rb, err := c.roleBindingLister.RoleBindings(mpiJob.Namespace).Get(mpiJob.Name + launcherSuffix)
// If the RoleBinding doesn't exist, we'll create it.
if errors.IsNotFound(err) {
rb, err = c.kubeClient.RbacV1().RoleBindings(mpiJob.Namespace).Create(newLauncherRoleBinding(mpiJob))
}
// If an error occurs during Get/Create, we'll requeue the item so we
// can attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil {
return nil, err
}
// If the launcher RoleBinding is not controlled by this MPIJob resource, we
// should log a warning to the event recorder and return.
if !metav1.IsControlledBy(rb, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, rb.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return nil, fmt.Errorf(msg)
}
return rb, nil
}
// getWorkerStatefulSet gets the worker StatefulSet controlled by this MPIJob.
func (c *MPIJobController) getWorkerStatefulSet(mpiJob *kubeflow.MPIJob, workerReplicas int) (*appsv1.StatefulSet, error) {
worker, err := c.statefulSetLister.StatefulSets(mpiJob.Namespace).Get(mpiJob.Name + workerSuffix)
// If the StatefulSet doesn't exist, we'll create it.
if errors.IsNotFound(err) && workerReplicas > 0 {
worker, err = c.kubeClient.AppsV1().StatefulSets(mpiJob.Namespace).Create(newWorker(mpiJob, int32(workerReplicas), c.gpusPerNode))
}
// If an error occurs during Get/Create, we'll requeue the item so we
// can attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil && !errors.IsNotFound(err) {
return nil, err
}
// If the worker is not controlled by this MPIJob resource, we should log
// a warning to the event recorder and return.
if worker != nil && !metav1.IsControlledBy(worker, mpiJob) {
msg := fmt.Sprintf(MessageResourceExists, worker.Name)
c.recorder.Event(mpiJob, corev1.EventTypeWarning, ErrResourceExists, msg)
return nil, fmt.Errorf(msg)
}
// If the worker is out of date, update the worker.
if worker != nil && int(*worker.Spec.Replicas) != workerReplicas {
worker, err = c.kubeClient.AppsV1().StatefulSets(mpiJob.Namespace).Update(newWorker(mpiJob, int32(workerReplicas), c.gpusPerNode))
// If an error occurs during Update, we'll requeue the item so we can
// attempt processing again later. This could have been caused by a
// temporary network failure, or any other transient reason.
if err != nil {
return nil, err
}
}
return worker, nil
}
func (c *MPIJobController) updateMPIJobStatus(mpiJob *kubeflow.MPIJob, launcher *batchv1.Job, worker *appsv1.StatefulSet) error {
// NEVER modify objects from the store. It's a read-only, local cache.
// You can use DeepCopy() to make a deep copy of original object and modify this copy
// Or create a copy manually for better performance
mpiJobCopy := mpiJob.DeepCopy()
if launcher != nil {
if launcher.Status.Active > 0 {
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherActive
} else if launcher.Status.Succeeded > 0 {
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherSucceeded
} else if launcher.Status.Failed > 0 {
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherFailed
}
}
if worker != nil {
mpiJobCopy.Status.WorkerReplicas = worker.Status.ReadyReplicas
}
// Until #38113 is merged, we must use Update instead of UpdateStatus to
// update the Status block of the MPIJob resource. UpdateStatus will not
// allow changes to the Spec of the resource, which is ideal for ensuring
// nothing other than resource status has been updated.
_, err := c.kubeflowClient.KubeflowV1alpha1().MPIJobs(mpiJob.Namespace).Update(mpiJobCopy)
return err
}
// enqueueMPIJob takes a MPIJob resource and converts it into a namespace/name
// string which is then put onto the work queue. This method should *not* be
// passed resources of any type other than MPIJob.
func (c *MPIJobController) enqueueMPIJob(obj interface{}) {
var key string
var err error
if key, err = cache.MetaNamespaceKeyFunc(obj); err != nil {
runtime.HandleError(err)
return
}
c.queue.AddRateLimited(key)
}
// handleObject will take any resource implementing metav1.Object and attempt
// to find the MPIJob resource that 'owns' it. It does this by looking at the
// objects metadata.ownerReferences field for an appropriate OwnerReference.
// It then enqueues that MPIJob resource to be processed. If the object does not
// have an appropriate OwnerReference, it will simply be skipped.
func (c *MPIJobController) handleObject(obj interface{}) {
var object metav1.Object
var ok bool
if object, ok = obj.(metav1.Object); !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
runtime.HandleError(fmt.Errorf("error decoding object, invalid type"))
return
}
object, ok = tombstone.Obj.(metav1.Object)
if !ok {
runtime.HandleError(fmt.Errorf("error decoding object tombstone, invalid type"))
return
}
glog.V(4).Infof("Recovered deleted object '%s' from tombstone", object.GetName())
}
glog.V(4).Infof("Processing object: %s", object.GetName())
if ownerRef := metav1.GetControllerOf(object); ownerRef != nil {
// If this object is not owned by a MPIJob, we should not do anything
// more with it.
if ownerRef.Kind != "MPIJob" {
return
}
mpiJob, err := c.mpiJobLister.MPIJobs(object.GetNamespace()).Get(ownerRef.Name)
if err != nil {
glog.V(4).Infof("ignoring orphaned object '%s' of mpi job '%s'", object.GetSelfLink(), ownerRef.Name)
return
}
c.enqueueMPIJob(mpiJob)
return
}
}
// newConfigMap creates a new ConfigMap containing configurations for an MPIJob
// resource. It also sets the appropriate OwnerReferences on the resource so
// handleObject can discover the MPIJob resource that 'owns' it.
func newConfigMap(mpiJob *kubeflow.MPIJob, workerReplicas int, gpusPerWorker int) *corev1.ConfigMap {
kubexec := fmt.Sprintf(`#!/bin/sh
set -x
POD_NAME=$1
shift
%s/kubectl exec ${POD_NAME} -- /bin/sh -c "$*"
`, kubectlMountPath)
var buffer bytes.Buffer
buffer.WriteString(fmt.Sprintf("localhost slots=%d max_slots=%d\n", gpusPerWorker, gpusPerWorker))
for i := 0; i < workerReplicas; i++ {
buffer.WriteString(fmt.Sprintf("%s%s-%d slots=%d max_slots=%d\n", mpiJob.Name, workerSuffix, i, gpusPerWorker, gpusPerWorker))
}
return &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: mpiJob.Name + configSuffix,
Namespace: mpiJob.Namespace,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
Data: map[string]string{
hostfileName: buffer.String(),
kubexecScriptName: kubexec,
},
}
}
// newLauncherServiceAccount creates a new launcher ServiceAccount for an MPIJob
// resource. It also sets the appropriate OwnerReferences on the resource so
// handleObject can discover the MPIJob resource that 'owns' it.
func newLauncherServiceAccount(mpiJob *kubeflow.MPIJob) *corev1.ServiceAccount {
return &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: mpiJob.Name + launcherSuffix,
Namespace: mpiJob.Namespace,
Labels: map[string]string{
"app": mpiJob.Name,
},
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
}
}
// newLauncherRole creates a new launcher Role for an MPIJob resource. It also
// sets the appropriate OwnerReferences on the resource so handleObject can
// discover the MPIJob resource that 'owns' it.
func newLauncherRole(mpiJob *kubeflow.MPIJob, workerReplicas int) *rbacv1.Role {
var podNames []string
for i := 0; i < workerReplicas; i++ {
podNames = append(podNames, fmt.Sprintf("%s%s-%d", mpiJob.Name, workerSuffix, i))
}
return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: mpiJob.Name + launcherSuffix,
Namespace: mpiJob.Namespace,
Labels: map[string]string{
"app": mpiJob.Name,
},
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
Rules: []rbacv1.PolicyRule{
{
Verbs: []string{"get"},
APIGroups: []string{""},
Resources: []string{"pods"},
ResourceNames: podNames,
},
{
Verbs: []string{"create"},
APIGroups: []string{""},
Resources: []string{"pods/exec"},
ResourceNames: podNames,
},
},
}
}
// newLauncherRoleBinding creates a new launcher RoleBinding for an MPIJob
// resource. It also sets the appropriate OwnerReferences on the resource so
// handleObject can discover the MPIJob resource that 'owns' it.
func newLauncherRoleBinding(mpiJob *kubeflow.MPIJob) *rbacv1.RoleBinding {
launcherName := mpiJob.Name + launcherSuffix
return &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: launcherName,
Namespace: mpiJob.Namespace,
Labels: map[string]string{
"app": mpiJob.Name,
},
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
Subjects: []rbacv1.Subject{
{
Kind: rbacv1.ServiceAccountKind,
Name: launcherName,
Namespace: mpiJob.Namespace,
},
},
RoleRef: rbacv1.RoleRef{
APIGroup: rbacv1.GroupName,
Kind: "Role",
Name: launcherName,
},
}
}
// newWorker creates a new worker StatefulSet for an MPIJob resource. It also
// sets the appropriate OwnerReferences on the resource so handleObject can
// discover the MPIJob resource that 'owns' it.
func newWorker(mpiJob *kubeflow.MPIJob, desiredReplicas int32, gpus int) *appsv1.StatefulSet {
labels := map[string]string{
"app": mpiJob.Name + workerSuffix,
}
podSpec := mpiJob.Spec.Template.DeepCopy()
podSpec.Labels = labels
container := podSpec.Spec.Containers[0]
container.Command = []string{"sleep"}
container.Args = []string{"365d"}
if container.Resources.Limits == nil {
container.Resources.Limits = make(corev1.ResourceList)
}
container.Resources.Limits[gpuResourceName] = *resource.NewQuantity(int64(gpus), resource.DecimalExponent)
// We need the kubexec.sh script here because Open MPI checks for the path
// in every rank.
container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{
Name: configVolumeName,
MountPath: configMountPath,
})
podSpec.Spec.Containers[0] = container
scriptMode := int32(0555)
podSpec.Spec.Volumes = append(podSpec.Spec.Volumes, corev1.Volume{
Name: configVolumeName,
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: mpiJob.Name + configSuffix,
},
Items: []corev1.KeyToPath{
{
Key: kubexecScriptName,
Path: kubexecScriptName,
Mode: &scriptMode,
},
},
},
},
})
return &appsv1.StatefulSet{
ObjectMeta: metav1.ObjectMeta{
Name: mpiJob.Name + workerSuffix,
Namespace: mpiJob.Namespace,
Labels: labels,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
Spec: appsv1.StatefulSetSpec{
PodManagementPolicy: appsv1.ParallelPodManagement,
Replicas: &desiredReplicas,
Selector: &metav1.LabelSelector{
MatchLabels: labels,
},
ServiceName: mpiJob.Name + workerSuffix,
Template: *podSpec,
},
}
}
// newLauncher creates a new launcher Job for an MPIJob resource. It also sets
// the appropriate OwnerReferences on the resource so handleObject can discover
// the MPIJob resource that 'owns' it.
func newLauncher(mpiJob *kubeflow.MPIJob, gpus int, kubectlDeliveryImage string) *batchv1.Job {
launcherName := mpiJob.Name + launcherSuffix
labels := map[string]string{
"app": launcherName,
}
podSpec := mpiJob.Spec.Template.DeepCopy()
podSpec.Labels = labels
podSpec.Spec.ServiceAccountName = launcherName
podSpec.Spec.InitContainers = append(podSpec.Spec.InitContainers, corev1.Container{
Name: kubectlDeliveryName,
Image: kubectlDeliveryImage,
Env: []corev1.EnvVar{
{
Name: kubectlTargetDirEnv,
Value: kubectlMountPath,
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: kubectlVolumeName,
MountPath: kubectlMountPath,
},
},
})
container := podSpec.Spec.Containers[0]
container.Env = append(container.Env,
corev1.EnvVar{
Name: "OMPI_MCA_plm_rsh_agent",
Value: fmt.Sprintf("%s/%s", configMountPath, kubexecScriptName),
},
corev1.EnvVar{
Name: "OMPI_MCA_orte_default_hostfile",
Value: fmt.Sprintf("%s/%s", configMountPath, hostfileName),
})
if container.Resources.Limits == nil {
container.Resources.Limits = make(corev1.ResourceList)
}
container.Resources.Limits[gpuResourceName] = *resource.NewQuantity(int64(gpus), resource.DecimalExponent)
container.VolumeMounts = append(container.VolumeMounts,
corev1.VolumeMount{
Name: kubectlVolumeName,
MountPath: kubectlMountPath,
},
corev1.VolumeMount{
Name: configVolumeName,
MountPath: configMountPath,
})
podSpec.Spec.Containers[0] = container
podSpec.Spec.RestartPolicy = corev1.RestartPolicyOnFailure
scriptsMode := int32(0555)
hostfileMode := int32(0444)
podSpec.Spec.Volumes = append(podSpec.Spec.Volumes,
corev1.Volume{
Name: kubectlVolumeName,
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{},
},
},
corev1.Volume{
Name: configVolumeName,
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: mpiJob.Name + configSuffix,
},
Items: []corev1.KeyToPath{
{
Key: kubexecScriptName,
Path: kubexecScriptName,
Mode: &scriptsMode,
},
{
Key: hostfileName,
Path: hostfileName,
Mode: &hostfileMode,
},
},
},
},
})
return &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: launcherName,
Namespace: mpiJob.Namespace,
Labels: labels,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(mpiJob, kubeflow.SchemeGroupVersionKind),
},
},
Spec: batchv1.JobSpec{
Template: *podSpec,
},
}
}

View File

@ -0,0 +1,648 @@
// Copyright 2018 The Kubeflow Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package controllers
import (
"reflect"
"testing"
"time"
appsv1 "k8s.io/api/apps/v1"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/diff"
kubeinformers "k8s.io/client-go/informers"
k8sfake "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/record"
kubeflow "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v1alpha1"
"github.com/kubeflow/mpi-operator/pkg/client/clientset/versioned/fake"
informers "github.com/kubeflow/mpi-operator/pkg/client/informers/externalversions"
)
var (
alwaysReady = func() bool { return true }
noResyncPeriodFunc = func() time.Duration { return 0 }
)
type fixture struct {
t *testing.T
client *fake.Clientset
kubeClient *k8sfake.Clientset
// Objects to put in the store.
configMapLister []*corev1.ConfigMap
serviceAccountLister []*corev1.ServiceAccount
roleLister []*rbacv1.Role
roleBindingLister []*rbacv1.RoleBinding
statefulSetLister []*appsv1.StatefulSet
jobLister []*batchv1.Job
mpiJobLister []*kubeflow.MPIJob
// Actions expected to happen on the client.
kubeActions []core.Action
actions []core.Action
// Objects from here are pre-loaded into NewSimpleFake.
kubeObjects []runtime.Object
objects []runtime.Object
}
func newFixture(t *testing.T) *fixture {
f := &fixture{}
f.t = t
f.objects = []runtime.Object{}
f.kubeObjects = []runtime.Object{}
return f
}
func newMPIJob(name string, gpus *int32) *kubeflow.MPIJob {
return &kubeflow.MPIJob{
TypeMeta: metav1.TypeMeta{APIVersion: kubeflow.SchemeGroupVersion.String()},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: metav1.NamespaceDefault,
},
Spec: kubeflow.MPIJobSpec{
GPUs: gpus,
Template: corev1.PodTemplateSpec{
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "foo",
Image: "bar",
},
},
},
},
},
}
}
func (f *fixture) newController() (*MPIJobController, informers.SharedInformerFactory, kubeinformers.SharedInformerFactory) {
f.client = fake.NewSimpleClientset(f.objects...)
f.kubeClient = k8sfake.NewSimpleClientset(f.kubeObjects...)
i := informers.NewSharedInformerFactory(f.client, noResyncPeriodFunc())
k8sI := kubeinformers.NewSharedInformerFactory(f.kubeClient, noResyncPeriodFunc())
c := NewMPIJobController(
f.kubeClient,
f.client,
k8sI.Core().V1().ConfigMaps(),
k8sI.Core().V1().ServiceAccounts(),
k8sI.Rbac().V1().Roles(),
k8sI.Rbac().V1().RoleBindings(),
k8sI.Apps().V1().StatefulSets(),
k8sI.Batch().V1().Jobs(),
i.Kubeflow().V1alpha1().MPIJobs(),
8,
"kubectl-delivery")
c.configMapSynced = alwaysReady
c.serviceAccountSynced = alwaysReady
c.roleSynced = alwaysReady
c.roleBindingSynced = alwaysReady
c.statefulSetSynced = alwaysReady
c.jobSynced = alwaysReady
c.mpiJobSynced = alwaysReady
c.recorder = &record.FakeRecorder{}
for _, configMap := range f.configMapLister {
k8sI.Core().V1().ConfigMaps().Informer().GetIndexer().Add(configMap)
}
for _, serviceAccount := range f.serviceAccountLister {
k8sI.Core().V1().ServiceAccounts().Informer().GetIndexer().Add(serviceAccount)
}
for _, role := range f.roleLister {
k8sI.Rbac().V1().Roles().Informer().GetIndexer().Add(role)
}
for _, roleBinding := range f.roleBindingLister {
k8sI.Rbac().V1().RoleBindings().Informer().GetIndexer().Add(roleBinding)
}
for _, statefulSet := range f.statefulSetLister {
k8sI.Apps().V1().StatefulSets().Informer().GetIndexer().Add(statefulSet)
}
for _, job := range f.jobLister {
k8sI.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
}
for _, mpiJob := range f.mpiJobLister {
i.Kubeflow().V1alpha1().MPIJobs().Informer().GetIndexer().Add(mpiJob)
}
return c, i, k8sI
}
func (f *fixture) run(mpiJobName string) {
f.runController(mpiJobName, true, false)
}
func (f *fixture) runExpectError(mpiJobName string) {
f.runController(mpiJobName, true, true)
}
func (f *fixture) runController(mpiJobName string, startInformers bool, expectError bool) {
c, i, k8sI := f.newController()
if startInformers {
stopCh := make(chan struct{})
defer close(stopCh)
i.Start(stopCh)
k8sI.Start(stopCh)
}
err := c.syncHandler(mpiJobName)
if !expectError && err != nil {
f.t.Errorf("error syncing mpi job: %v", err)
} else if expectError && err == nil {
f.t.Error("expected error syncing mpi job, got nil")
}
actions := filterInformerActions(f.client.Actions())
for i, action := range actions {
if len(f.actions) < i+1 {
f.t.Errorf("%d unexpected actions: %+v", len(actions)-len(f.actions), actions[i:])
break
}
expectedAction := f.actions[i]
checkAction(expectedAction, action, f.t)
}
if len(f.actions) > len(actions) {
f.t.Errorf("%d additional expected actions:%+v", len(f.actions)-len(actions), f.actions[len(actions):])
}
k8sActions := filterInformerActions(f.kubeClient.Actions())
for i, action := range k8sActions {
if len(f.kubeActions) < i+1 {
f.t.Errorf("%d unexpected actions: %+v", len(k8sActions)-len(f.kubeActions), k8sActions[i:])
break
}
expectedAction := f.kubeActions[i]
checkAction(expectedAction, action, f.t)
}
if len(f.kubeActions) > len(k8sActions) {
f.t.Errorf("%d additional expected actions:%+v", len(f.kubeActions)-len(k8sActions), f.kubeActions[len(k8sActions):])
}
}
// checkAction verifies that expected and actual actions are equal and both have
// same attached resources
func checkAction(expected, actual core.Action, t *testing.T) {
if !(expected.Matches(actual.GetVerb(), actual.GetResource().Resource) && actual.GetSubresource() == expected.GetSubresource()) {
t.Errorf("Expected\n\t%#v\ngot\n\t%#v", expected, actual)
return
}
if reflect.TypeOf(actual) != reflect.TypeOf(expected) {
t.Errorf("Action has wrong type. Expected: %t. Got: %t", expected, actual)
return
}
switch a := actual.(type) {
case core.CreateAction:
e, _ := expected.(core.CreateAction)
expObject := e.GetObject()
object := a.GetObject()
if !reflect.DeepEqual(expObject, object) {
t.Errorf("Action %s %s has wrong object\nDiff:\n %s",
a.GetVerb(), a.GetResource().Resource, diff.ObjectGoPrintDiff(expObject, object))
}
case core.UpdateAction:
e, _ := expected.(core.UpdateAction)
expObject := e.GetObject()
object := a.GetObject()
if !reflect.DeepEqual(expObject, object) {
t.Errorf("Action %s %s has wrong object\nDiff:\n %s",
a.GetVerb(), a.GetResource().Resource, diff.ObjectGoPrintDiff(expObject, object))
}
case core.PatchAction:
e, _ := expected.(core.PatchAction)
expPatch := e.GetPatch()
patch := a.GetPatch()
if !reflect.DeepEqual(expPatch, expPatch) {
t.Errorf("Action %s %s has wrong patch\nDiff:\n %s",
a.GetVerb(), a.GetResource().Resource, diff.ObjectGoPrintDiff(expPatch, patch))
}
}
}
// filterInformerActions filters list and watch actions for testing resources.
// Since list and watch don't change resource state we can filter it to lower
// nose level in our tests.
func filterInformerActions(actions []core.Action) []core.Action {
var ret []core.Action
for _, action := range actions {
if len(action.GetNamespace()) == 0 &&
(action.Matches("list", "configmaps") ||
action.Matches("watch", "configmaps") ||
action.Matches("list", "serviceaccounts") ||
action.Matches("watch", "serviceaccounts") ||
action.Matches("list", "roles") ||
action.Matches("watch", "roles") ||
action.Matches("list", "rolebindings") ||
action.Matches("watch", "rolebindings") ||
action.Matches("list", "statefulsets") ||
action.Matches("watch", "statefulsets") ||
action.Matches("list", "pods") ||
action.Matches("watch", "pods") ||
action.Matches("list", "jobs") ||
action.Matches("watch", "jobs") ||
action.Matches("list", "mpijobs") ||
action.Matches("watch", "mpijobs")) {
continue
}
ret = append(ret, action)
}
return ret
}
func (f *fixture) expectCreateConfigMapAction(d *corev1.ConfigMap) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "configmaps"}, d.Namespace, d))
}
func (f *fixture) expectUpdateConfigMapAction(d *corev1.ConfigMap) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "configmaps"}, d.Namespace, d))
}
func (f *fixture) expectCreateServiceAccountAction(d *corev1.ServiceAccount) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "serviceaccounts"}, d.Namespace, d))
}
func (f *fixture) expectUpdateServiceAccountAction(d *corev1.ServiceAccount) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "serviceaccounts"}, d.Namespace, d))
}
func (f *fixture) expectCreateRoleAction(d *rbacv1.Role) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "roles"}, d.Namespace, d))
}
func (f *fixture) expectUpdateRoleAction(d *rbacv1.Role) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "roles"}, d.Namespace, d))
}
func (f *fixture) expectCreateRoleBindingAction(d *rbacv1.RoleBinding) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "rolebindings"}, d.Namespace, d))
}
func (f *fixture) expectUpdateRoleBindingAction(d *rbacv1.RoleBinding) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "rolebindings"}, d.Namespace, d))
}
func (f *fixture) expectCreateStatefulSetAction(d *appsv1.StatefulSet) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "statefulsets"}, d.Namespace, d))
}
func (f *fixture) expectUpdateStatefulSetAction(d *appsv1.StatefulSet) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "statefulsets"}, d.Namespace, d))
}
func (f *fixture) expectCreateJobAction(d *batchv1.Job) {
f.kubeActions = append(f.kubeActions, core.NewCreateAction(schema.GroupVersionResource{Resource: "jobs"}, d.Namespace, d))
}
func (f *fixture) expectUpdateJobAction(d *batchv1.Job) {
f.kubeActions = append(f.kubeActions, core.NewUpdateAction(schema.GroupVersionResource{Resource: "jobs"}, d.Namespace, d))
}
func (f *fixture) expectUpdateMPIJobStatusAction(mpiJob *kubeflow.MPIJob) {
action := core.NewUpdateAction(schema.GroupVersionResource{Resource: "mpijobs"}, mpiJob.Namespace, mpiJob)
// TODO: Until #38113 is merged, we can't use Subresource
//action.Subresource = "status"
f.actions = append(f.actions, action)
}
func (f *fixture) setUpMPIJob(mpiJob *kubeflow.MPIJob) {
f.mpiJobLister = append(f.mpiJobLister, mpiJob)
f.objects = append(f.objects, mpiJob)
}
func (f *fixture) setUpLauncher(launcher *batchv1.Job) {
f.jobLister = append(f.jobLister, launcher)
f.kubeObjects = append(f.kubeObjects, launcher)
}
func (f *fixture) setUpWorker(worker *appsv1.StatefulSet) {
f.statefulSetLister = append(f.statefulSetLister, worker)
f.kubeObjects = append(f.kubeObjects, worker)
}
func (f *fixture) setUpConfigMap(configMap *corev1.ConfigMap) {
f.configMapLister = append(f.configMapLister, configMap)
f.kubeObjects = append(f.kubeObjects, configMap)
}
func (f *fixture) setUpServiceAccount(serviceAccount *corev1.ServiceAccount) {
f.serviceAccountLister = append(f.serviceAccountLister, serviceAccount)
f.kubeObjects = append(f.kubeObjects, serviceAccount)
}
func (f *fixture) setUpRole(role *rbacv1.Role) {
f.roleLister = append(f.roleLister, role)
f.kubeObjects = append(f.kubeObjects, role)
}
func (f *fixture) setUpRoleBinding(roleBinding *rbacv1.RoleBinding) {
f.roleBindingLister = append(f.roleBindingLister, roleBinding)
f.kubeObjects = append(f.kubeObjects, roleBinding)
}
func (f *fixture) setUpRbac(mpiJob *kubeflow.MPIJob, workerReplicas int) {
serviceAccount := newLauncherServiceAccount(mpiJob)
f.setUpServiceAccount(serviceAccount)
role := newLauncherRole(mpiJob, workerReplicas)
f.setUpRole(role)
roleBinding := newLauncherRoleBinding(mpiJob)
f.setUpRoleBinding(roleBinding)
}
func getKey(mpiJob *kubeflow.MPIJob, t *testing.T) string {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(mpiJob)
if err != nil {
t.Errorf("Unexpected error getting key for mpi job %v: %v", mpiJob.Name, err)
return ""
}
return key
}
func TestDoNothingWithInvalidKey(t *testing.T) {
f := newFixture(t)
f.run("foo/bar/baz")
}
func TestDoNothingWithNonexistentMPIJob(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.run(getKey(mpiJob, t))
}
func TestLauncherNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
launcher := newLauncher(mpiJob, 64, "kubectl-delivery")
launcher.OwnerReferences = nil
f.setUpLauncher(launcher)
f.runExpectError(getKey(mpiJob, t))
}
func TestLauncherSucceeded(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
launcher := newLauncher(mpiJob, 64, "kubectl-delivery")
launcher.Status.Succeeded = 1
f.setUpLauncher(launcher)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherSucceeded
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func TestLauncherFailed(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
launcher := newLauncher(mpiJob, 64, "kubectl-delivery")
launcher.Status.Failed = 1
f.setUpLauncher(launcher)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherFailed
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func TestLauncherDoesNotExist(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
expConfigMap := newConfigMap(mpiJob, 7, 8)
f.expectCreateConfigMapAction(expConfigMap)
expServiceAccount := newLauncherServiceAccount(mpiJob)
f.expectCreateServiceAccountAction(expServiceAccount)
expRole := newLauncherRole(mpiJob, 7)
f.expectCreateRoleAction(expRole)
expRoleBinding := newLauncherRoleBinding(mpiJob)
f.expectCreateRoleBindingAction(expRoleBinding)
expWorker := newWorker(mpiJob, 7, 8)
f.expectCreateStatefulSetAction(expWorker)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.WorkerReplicas = 0
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func TestConfigMapNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
configMap := newConfigMap(mpiJob, 7, 8)
configMap.OwnerReferences = nil
f.setUpConfigMap(configMap)
f.runExpectError(getKey(mpiJob, t))
}
func TestServiceAccountNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 7, 8))
serviceAccount := newLauncherServiceAccount(mpiJob)
serviceAccount.OwnerReferences = nil
f.setUpServiceAccount(serviceAccount)
f.runExpectError(getKey(mpiJob, t))
}
func TestRoleNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 7, 8))
f.setUpServiceAccount(newLauncherServiceAccount(mpiJob))
role := newLauncherRole(mpiJob, 7)
role.OwnerReferences = nil
f.setUpRole(role)
f.runExpectError(getKey(mpiJob, t))
}
func TestRoleBindingNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 7, 8))
f.setUpServiceAccount(newLauncherServiceAccount(mpiJob))
f.setUpRole(newLauncherRole(mpiJob, 7))
roleBinding := newLauncherRoleBinding(mpiJob)
roleBinding.OwnerReferences = nil
f.setUpRoleBinding(roleBinding)
f.runExpectError(getKey(mpiJob, t))
}
func TestShutdownWorker(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
launcher := newLauncher(mpiJob, 64, "kubectl-delivery")
launcher.Status.Succeeded = 1
f.setUpLauncher(launcher)
worker := newWorker(mpiJob, 7, 8)
f.setUpWorker(worker)
expWorker := newWorker(mpiJob, 0, 8)
f.expectUpdateStatefulSetAction(expWorker)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.WorkerReplicas = 0
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherSucceeded
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func TestWorkerNotControlledByUs(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(64))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 7, 8))
f.setUpRbac(mpiJob, 7)
worker := newWorker(mpiJob, 7, 8)
worker.OwnerReferences = nil
f.setUpWorker(worker)
f.runExpectError(getKey(mpiJob, t))
}
func TestWorkerNotNeeded(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(8))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 0, 8))
f.setUpRbac(mpiJob, 0)
expLauncher := newLauncher(mpiJob, 8, "kubectl-delivery")
f.expectCreateJobAction(expLauncher)
f.expectUpdateMPIJobStatusAction(mpiJob)
f.run(getKey(mpiJob, t))
}
func TestLauncherActive(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(8))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 0, 8))
f.setUpRbac(mpiJob, 0)
launcher := newLauncher(mpiJob, 64, "kubectl-delivery")
launcher.Status.Active = 1
f.setUpLauncher(launcher)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.LauncherStatus = kubeflow.LauncherActive
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func TestWorkerReady(t *testing.T) {
f := newFixture(t)
mpiJob := newMPIJob("test", int32Ptr(16))
f.setUpMPIJob(mpiJob)
f.setUpConfigMap(newConfigMap(mpiJob, 1, 8))
f.setUpRbac(mpiJob, 1)
worker := newWorker(mpiJob, 1, 8)
worker.Status.ReadyReplicas = 1
f.setUpWorker(worker)
expLauncher := newLauncher(mpiJob, 8, "kubectl-delivery")
f.expectCreateJobAction(expLauncher)
mpiJobCopy := mpiJob.DeepCopy()
mpiJobCopy.Status.WorkerReplicas = 1
f.expectUpdateMPIJobStatusAction(mpiJobCopy)
f.run(getKey(mpiJob, t))
}
func int32Ptr(i int32) *int32 { return &i }