mirror of https://github.com/kubeflow/arena.git
Compare commits
507 Commits
v0.2.0-rc.
...
master
Author | SHA1 | Date |
---|---|---|
|
f8ee31410c | |
|
ec5255280c | |
|
d1f7be63ab | |
|
a190ca253b | |
|
695c2c67f0 | |
|
75ec421d62 | |
|
25d7b1109e | |
|
d2d5f77a97 | |
|
c4ccb4ca7e | |
|
aa33dc51b7 | |
|
9e84dad37a | |
|
c9d5653de3 | |
|
4618e321ab | |
|
ca7bf97da4 | |
|
1c633d76ff | |
|
3693f59663 | |
|
fa2fad7d6e | |
|
8f4a602ce6 | |
|
ad85546c23 | |
|
babcb76f91 | |
|
ba7a09ace6 | |
|
545f86bfe9 | |
|
568e3845f5 | |
|
8b84559944 | |
|
ee2384b911 | |
|
2fbb3d7ed4 | |
|
19b5133e6e | |
|
8d413b5861 | |
|
2f6e202bbf | |
|
f3d52fa73a | |
|
ece85b8ce3 | |
|
d497232013 | |
|
9407f9b1a0 | |
|
d9bf195879 | |
|
19abf194bb | |
|
1f9350d78c | |
|
23e9731b52 | |
|
d6b177b93d | |
|
0ca2670770 | |
|
7d7f75ad2d | |
|
4b21f7299b | |
|
36a59bba67 | |
|
ccdbf44815 | |
|
36b17b4175 | |
|
1058d48063 | |
|
ce9c5f3bff | |
|
970afbd209 | |
|
f1bb3bcdbb | |
|
b814410627 | |
|
38218aa3a0 | |
|
13fa5c8dc8 | |
|
f098f1af85 | |
|
b0e411cab5 | |
|
5e18210479 | |
|
13df29407c | |
|
0a701eb03d | |
|
0482946a0c | |
|
0d4b513d65 | |
|
e8b9fcd10d | |
|
190c18e840 | |
|
dc0929f32f | |
|
74ade74d3e | |
|
316e33c999 | |
|
fc47e460e1 | |
|
1cba9b99dc | |
|
866ec44648 | |
|
ac164b85bf | |
|
d61a784a13 | |
|
74fd3f2ad3 | |
|
a765b1c5a0 | |
|
0838d54757 | |
|
ca735b6152 | |
|
969ad681a3 | |
|
29b2d6d2c5 | |
|
22a3df5023 | |
|
68b71f9006 | |
|
70278ce8f7 | |
|
8e008a4916 | |
|
46a795e3db | |
|
76ca05975e | |
|
dce03cc700 | |
|
7885f46081 | |
|
8d6c23d14c | |
|
bd1b0da049 | |
|
e15cb18aeb | |
|
82fd0ba7e5 | |
|
a1b7285e1d | |
|
522a0c610f | |
|
b8af066a2f | |
|
42b8fcae2e | |
|
45c8e1b150 | |
|
fdcfd18a98 | |
|
41fb18b640 | |
|
bf49baae30 | |
|
bd159b2d0f | |
|
7c10b6756c | |
|
0d95df6f1e | |
|
11b771b417 | |
|
223e534b91 | |
|
7197b5cb40 | |
|
b2c5686543 | |
|
dfd3268cc6 | |
|
513894a1f0 | |
|
064927ef5c | |
|
a9ed5f6eaf | |
|
b2380e60dc | |
|
bf53ba33ea | |
|
305005ebdf | |
|
b70297a03a | |
|
ded5780b29 | |
|
c1f39aba1f | |
|
94fc66024f | |
|
c3e73610b0 | |
|
e279bad1cf | |
|
3409e5b1e4 | |
|
3afe470d8d | |
|
f11dae2a6f | |
|
a80b33508f | |
|
6c2373d32e | |
|
b500f9eda2 | |
|
98a43dc6d9 | |
|
881780fb08 | |
|
9064896a91 | |
|
c9dbc8f968 | |
|
5748fe4136 | |
|
33181529ab | |
|
5e8b6ddbff | |
|
a3a348c00a | |
|
7acbb8c408 | |
|
19c9090bd7 | |
|
48eed0fe82 | |
|
3926187d64 | |
|
95d4bbeb94 | |
|
dbf740f8cb | |
|
64808b67e6 | |
|
37d8ab4d50 | |
|
a031bae968 | |
|
f31e1b0be0 | |
|
5034f390d2 | |
|
43b60eddb7 | |
|
1398c8f307 | |
|
acac0fbb25 | |
|
451030cfcb | |
|
adb43b8d74 | |
|
fed8afc602 | |
|
dd69d9c1af | |
|
768218e8f5 | |
|
d1e62ffa3a | |
|
c114755222 | |
|
12f205ef89 | |
|
5ac396c7ab | |
|
8b05634bea | |
|
b7f0ecf50e | |
|
57093a20fb | |
|
70f4a13547 | |
|
d648a2a8cf | |
|
0a7501c542 | |
|
e4631c492d | |
|
6fd3d0e022 | |
|
f27a6780ce | |
|
ed2aea2f86 | |
|
a707f81ef6 | |
|
23b4fe9090 | |
|
3e7e915c16 | |
|
8739eb536c | |
|
875d0022b5 | |
|
1449e75f92 | |
|
10e1e629af | |
|
ff24a10944 | |
|
8db2d49353 | |
|
cdf1bb3102 | |
|
67a9150c56 | |
|
0df51d7492 | |
|
7f31c6b209 | |
|
ce87d1095d | |
|
c4d37efa2b | |
|
a577b6d6ce | |
|
261cf3a362 | |
|
4dc39d6b52 | |
|
a7e6a0fc19 | |
|
4afe00e05a | |
|
46093aec39 | |
|
bf33adad6d | |
|
650d2ef0f8 | |
|
14fa45c995 | |
|
2029700bd8 | |
|
de8cb950de | |
|
3fe9ae4026 | |
|
81a8bf85c9 | |
|
4b5c18cab9 | |
|
2669f364ee | |
|
a45f3a5fcf | |
|
a6a8f3003d | |
|
ce4a78dc91 | |
|
516d8cbe7b | |
|
47c4420e84 | |
|
51151af1c3 | |
|
16c2746bfd | |
|
37745b5610 | |
|
016da2a495 | |
|
c167d3ea08 | |
|
cd1f02eb57 | |
|
908501acea | |
|
29298ca25a | |
|
d51fe2eecb | |
|
b58010a509 | |
|
eaf1e7851d | |
|
b3c2c7f9f3 | |
|
0c2d171290 | |
|
09a57151f2 | |
|
c3948e250d | |
|
f2780f4cea | |
|
925cac7e19 | |
|
d3e59d1703 | |
|
f7e889e3f6 | |
|
5eb3b9ca7c | |
|
85e40e0451 | |
|
eece0452f5 | |
|
840f678201 | |
|
e195c230d1 | |
|
4c677350a9 | |
|
eed3aeb499 | |
|
ef1ea85a59 | |
|
3c0c15ee98 | |
|
c3da54dbc4 | |
|
e11d8f7715 | |
|
ac04ff5947 | |
|
60d1fd4fc6 | |
|
5c113cae4f | |
|
9114bccc93 | |
|
99e2649e45 | |
|
28663a84d1 | |
|
df37e209c5 | |
|
774f02adcb | |
|
44b6bdca06 | |
|
5af7539d48 | |
|
06ee271376 | |
|
cb27b5df22 | |
|
0fc1ef29b1 | |
|
6cbdaa3afb | |
|
2b5da64346 | |
|
9e70decd6c | |
|
7243c9bce6 | |
|
4145068c94 | |
|
65072b4f6a | |
|
3ea26bbeb0 | |
|
91114cc834 | |
|
fcda9bb6f5 | |
|
add6ac606f | |
|
d4c97527ac | |
|
447b534163 | |
|
05d75f1f05 | |
|
28c8263823 | |
|
015ff4a3a5 | |
|
62415ec062 | |
|
af3504b070 | |
|
95356164c6 | |
|
8cbeadb3b2 | |
|
37bfe9bb32 | |
|
4f1c62cc94 | |
|
48c950ae04 | |
|
9066e985ea | |
|
df2c3ea9de | |
|
60756c65ca | |
|
32d391771e | |
|
d0ad02d39d | |
|
5a47651b12 | |
|
eb2933b4b0 | |
|
b2cd6c5287 | |
|
4ce2d1b7d7 | |
|
5e8ec652b9 | |
|
a14ad59363 | |
|
daded93e9e | |
|
65f646f860 | |
|
3a8be3dc5c | |
|
d06b92df6b | |
|
1d00b1fcb7 | |
|
3786dfa757 | |
|
194352f5bc | |
|
42d6d76d3e | |
|
f506738418 | |
|
ce06540d5d | |
|
b67e40c456 | |
|
ed6de0d5c7 | |
|
f73ef6250e | |
|
892217e74c | |
|
c24e3d535b | |
|
70486e5e7d | |
|
e51b97eb2b | |
|
4ba23b122f | |
|
dd265dae42 | |
|
a2bec8c2e6 | |
|
cb249e1285 | |
|
7ab7410ff0 | |
|
86cb696826 | |
|
5bc27110bf | |
|
00ed936b5b | |
|
efa4f6d040 | |
|
e2789d70d1 | |
|
11d09241f4 | |
|
3d6c09a8df | |
|
b328b87ddb | |
|
ea7c4ea672 | |
|
71ec536fcd | |
|
eaf5106bd0 | |
|
264b96a3fd | |
|
77c4d32450 | |
|
f8aea8c690 | |
|
641ba829b3 | |
|
586faff1be | |
|
a3f3694fae | |
|
3453b57f32 | |
|
3324674757 | |
|
4fb97ce04a | |
|
2fbb6080b8 | |
|
bdce431b90 | |
|
ed79092ffd | |
|
07c1439ce3 | |
|
48129d11f9 | |
|
54f3d37879 | |
|
f4d5df00d0 | |
|
c3f42edf10 | |
|
20e9fe4efa | |
|
4dca70ddc9 | |
|
9bba0d8c58 | |
|
bf8e065b68 | |
|
86deea17c6 | |
|
61fc2c159c | |
|
c0fd8e46ba | |
|
c3eb2cf573 | |
|
a1c50041e1 | |
|
ba37c8a984 | |
|
cebb2cee7a | |
|
551d2d058d | |
|
435b517e16 | |
|
f68991bc50 | |
|
4b02fa9607 | |
|
ed0d1ab840 | |
|
0195afcacf | |
|
d28009bd98 | |
|
8227e03805 | |
|
ad6688db30 | |
|
c183c9bdd4 | |
|
aa7450f787 | |
|
5bf35204de | |
|
b6f754f716 | |
|
5f5b9de7ed | |
|
c1899105ff | |
|
535193746b | |
|
2480fe92de | |
|
bf26dee3c9 | |
|
50c26e9fbe | |
|
2720e6f5b0 | |
|
7af62c8e6c | |
|
4ed2d203f2 | |
|
ca3934b452 | |
|
d1eda4e5a2 | |
|
b605824639 | |
|
d0e39fc8c1 | |
|
2fbc4b892e | |
|
545dd90f47 | |
|
95a674686b | |
|
3fc7c97ead | |
|
fcf0f8b387 | |
|
3559f56b57 | |
|
386e37ba9f | |
|
c6f5800d09 | |
|
8027003fea | |
|
336c06d1c7 | |
|
95c9b6bdf4 | |
|
b2404c4a75 | |
|
9825b10f58 | |
|
a43477d13c | |
|
dac8d1cf91 | |
|
7eea1ab194 | |
|
0c7961c616 | |
|
6e2f9973ce | |
|
5b0b6f4f79 | |
|
692077ce91 | |
|
039149693e | |
|
340100e9d0 | |
|
5282382ef4 | |
|
4ac5fe6e15 | |
|
fac5593c11 | |
|
2f7e48aa52 | |
|
c3e0582ab5 | |
|
b7576c75e4 | |
|
50813f7ea8 | |
|
10f90525a8 | |
|
d12e011acb | |
|
a9e5644d45 | |
|
5b32b154a1 | |
|
1290baf51b | |
|
57ce0cc091 | |
|
65e9b24b09 | |
|
cc19ff2198 | |
|
267298d715 | |
|
3427848463 | |
|
aec9ffa7d7 | |
|
2f76b96c15 | |
|
f246c57316 | |
|
bf5cb7fa93 | |
|
d0ee8aeec4 | |
|
3732c5819a | |
|
e689b57561 | |
|
64667df022 | |
|
34c0cb4155 | |
|
819d2e74ae | |
|
d8ea539f38 | |
|
f1f6f4e694 | |
|
b2d3c24274 | |
|
196a19acd7 | |
|
e22162d6f9 | |
|
70d16402e1 | |
|
68662f2d90 | |
|
5342b52858 | |
|
82e082d925 | |
|
534d0e7c75 | |
|
829b0e9667 | |
|
99ebcf4aba | |
|
6b19e11828 | |
|
afb90617de | |
|
d9acc267bf | |
|
31a024b160 | |
|
332fcdecce | |
|
fe3c306a06 | |
|
dfc870630f | |
|
20a4df9281 | |
|
f4a5f948d5 | |
|
0ab7a177cc | |
|
e58dc6939d | |
|
f80d61572b | |
|
d9cb0d93e8 | |
|
b347794b6e | |
|
00dc37021c | |
|
913d988383 | |
|
b96e1acba0 | |
|
2ae9c351e2 | |
|
3d739256a5 | |
|
7fd5508690 | |
|
c2681a2e4e | |
|
601719c85c | |
|
37707f2d66 | |
|
d2e87badf1 | |
|
9091f50d55 | |
|
3276cb638e | |
|
f9779b3e33 | |
|
84bdf45dd0 | |
|
3a4d69a207 | |
|
3a80faaf4a | |
|
ddf2022fcb | |
|
f78afccc69 | |
|
62589bb765 | |
|
d697c15906 | |
|
04d428b42a | |
|
61251bc84c | |
|
3fb80f545e | |
|
bf3e6b3a51 | |
|
9ed0577a08 | |
|
78d4b46268 | |
|
ac1711951b | |
|
f9b56156d2 | |
|
32118f387e | |
|
ec9f6fe402 | |
|
85b3b7c9a7 | |
|
bc7fdc7128 | |
|
dd1ececf1c | |
|
9df4b69afc | |
|
32af2980de | |
|
da92b370bf | |
|
7bda176745 | |
|
0a6b45b4e3 | |
|
63904d0714 | |
|
d033fdad3e | |
|
6e23fc8003 | |
|
8b114b2165 | |
|
dc2076c3db | |
|
23454bdf41 | |
|
f28f6c9ad4 | |
|
fde161aa39 | |
|
38a7ecd33f | |
|
516291c56a | |
|
06df7e4e64 | |
|
fa1e91f32a | |
|
0b66622e82 | |
|
de71ada09e | |
|
a9ccc75600 | |
|
ff9e461b3d | |
|
a4e4bb3da9 | |
|
2c312a926b | |
|
f1e16550e8 | |
|
9981253290 | |
|
0e97093b98 | |
|
b5392a4cdd | |
|
0ab0362fab | |
|
44b0c1de82 | |
|
9b1b4d5340 | |
|
6919c18003 | |
|
f6e9b53d5b | |
|
43fb071e61 | |
|
d1a357ea17 | |
|
7af0024eb3 | |
|
235f6788fb | |
|
a172520b99 | |
|
627be3b641 | |
|
871c8ecec0 | |
|
02391feb17 |
|
@ -1,25 +0,0 @@
|
|||
# Golang CircleCI 2.0 configuration file
|
||||
#
|
||||
# Check https://circleci.com/docs/2.0/language-go/ for more details
|
||||
version: 2
|
||||
jobs:
|
||||
build:
|
||||
docker:
|
||||
- image: circleci/golang:1.10
|
||||
working_directory: /go/src/github.com/kubeflow/arena
|
||||
steps:
|
||||
- checkout
|
||||
- setup_remote_docker:
|
||||
docker_layer_caching: true
|
||||
- run:
|
||||
name: run tests
|
||||
command: |
|
||||
test -z $(go fmt ./...)
|
||||
go vet ./...
|
||||
go test -race -v ./...
|
||||
- run: docker build -t acs/arena:$CIRCLE_BUILD_NUM -f Dockerfile.install .
|
||||
- run:
|
||||
name: codecov
|
||||
command: |
|
||||
go test -race -coverprofile=coverage.txt -covermode=atomic ./...
|
||||
bash <(curl -s https://codecov.io/bash)
|
|
@ -0,0 +1,18 @@
|
|||
bin/
|
||||
docs/
|
||||
jupyter/
|
||||
samples/
|
||||
sdk/
|
||||
.gitignore
|
||||
.readthedocs.yaml
|
||||
Dockerfile*
|
||||
LICENSE
|
||||
OWNERS
|
||||
README.md
|
||||
README_cn.md
|
||||
ROADMAP.md
|
||||
ROADMAP_cn.md
|
||||
cover.out
|
||||
demo.jpg
|
||||
mkdocs.yml
|
||||
prow_config.yaml
|
|
@ -0,0 +1,48 @@
|
|||
name: Bug Report
|
||||
description: Tell us about a problem you are experiencing with Arena
|
||||
labels: ["kind/bug", "lifecycle/needs-triage"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this Arena bug report!
|
||||
- type: textarea
|
||||
id: problem
|
||||
attributes:
|
||||
label: What happened?
|
||||
description: |
|
||||
Please provide as much info as possible.
|
||||
Not doing so may result in your bug not being addressed in a timely manner.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: expected
|
||||
attributes:
|
||||
label: What did you expect to happen?
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: environment
|
||||
attributes:
|
||||
label: Environment
|
||||
value: |
|
||||
Kubernetes version:
|
||||
|
||||
```bash
|
||||
$ kubectl version
|
||||
|
||||
```
|
||||
|
||||
Arena version:
|
||||
|
||||
```bash
|
||||
$ arena version
|
||||
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: votes
|
||||
attributes:
|
||||
label: Impacted by this bug?
|
||||
value: Give it a 👍 We prioritize the issues with most 👍
|
|
@ -0,0 +1,6 @@
|
|||
blank_issues_enabled: true
|
||||
|
||||
contact_links:
|
||||
- name: Arena Documentation
|
||||
url: https://arena-docs.readthedocs.io/en/stable
|
||||
about: Much help can be found in the docs
|
|
@ -0,0 +1,28 @@
|
|||
name: Feature Request
|
||||
description: Suggest an idea for Arena
|
||||
labels: ["kind/feature", "lifecycle/needs-triage"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this Arena feature request!
|
||||
- type: textarea
|
||||
id: feature
|
||||
attributes:
|
||||
label: What you would like to be added?
|
||||
description: |
|
||||
A clear and concise description of what you want to add to Arena.
|
||||
Please consider to write Arena enhancement proposal if it is a large feature request.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: rationale
|
||||
attributes:
|
||||
label: Why is this needed?
|
||||
validations:
|
||||
required: true
|
||||
- type: input
|
||||
id: votes
|
||||
attributes:
|
||||
label: Love this feature?
|
||||
value: Give it a 👍 We prioritize the features with most 👍
|
|
@ -0,0 +1,27 @@
|
|||
name: Question
|
||||
description: Ask question about Arena
|
||||
labels: ["kind/question", "lifecycle/needs-triage"]
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Thanks for taking the time to fill out this question!
|
||||
- type: textarea
|
||||
id: feature
|
||||
attributes:
|
||||
label: What question do you want to ask?
|
||||
description: |
|
||||
A clear and concise description of what you want to ask about Arena.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: rationale
|
||||
attributes:
|
||||
label: Any additional context?
|
||||
validations:
|
||||
required: false
|
||||
- type: input
|
||||
id: votes
|
||||
attributes:
|
||||
label: Have the same question?
|
||||
value: Give it a 👍 We prioritize the question with most 👍
|
|
@ -0,0 +1,29 @@
|
|||
<!-- Thanks for sending a pull request! Here are some tips for you:
|
||||
1. If this is your first time, check our contributor guidelines: https://www.kubeflow.org/docs/about/contributing
|
||||
2. To know more about Arena, check the developer guide:
|
||||
https://arena-docs.readthedocs.io/en/latest/
|
||||
3. If you want *faster* PR reviews, check how: https://git.k8s.io/community/contributors/guide/pull-requests.md#best-practices-for-faster-reviews
|
||||
-->
|
||||
|
||||
## Purpose of this PR
|
||||
|
||||
<!-- Provide a clear and concise description of the changes. Explain the motivation behind these changes and link to relevant issues or discussions. -->
|
||||
|
||||
**Proposed changes:**
|
||||
|
||||
- <Change 1>
|
||||
- <Change 2>
|
||||
- <Change 3>
|
||||
|
||||
## Change Category
|
||||
|
||||
<!-- Indicate the type of change by marking the applicable boxes. -->
|
||||
|
||||
- [ ] Bugfix (non-breaking change which fixes an issue)
|
||||
- [ ] Feature (non-breaking change which adds functionality)
|
||||
- [ ] Breaking change (fix or feature that could affect existing functionality)
|
||||
- [ ] Documentation update
|
||||
|
||||
### Rationale
|
||||
|
||||
<!-- Provide reasoning for the changes if not already covered in the description above. -->
|
|
@ -0,0 +1,26 @@
|
|||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: gomod
|
||||
directory: /
|
||||
schedule:
|
||||
interval: daily
|
||||
|
||||
- package-ecosystem: maven
|
||||
directory: /
|
||||
schedule:
|
||||
interval: daily
|
||||
|
||||
- package-ecosystem: pip
|
||||
directory: /
|
||||
schedule:
|
||||
interval: daily
|
||||
|
||||
- package-ecosystem: docker
|
||||
directory: /
|
||||
schedule:
|
||||
interval: daily
|
||||
|
||||
- package-ecosystem: github-actions
|
||||
directory: /
|
||||
schedule:
|
||||
interval: daily
|
|
@ -0,0 +1,5 @@
|
|||
# For https://mlbot.net a Github bot that labels issues using KubeFlow
|
||||
label-alias:
|
||||
bug: kind/bug
|
||||
feature_request: kind/feature
|
||||
question: kind/question
|
|
@ -0,0 +1,69 @@
|
|||
name: Check Release
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- VERSION
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
SEMVER_PATTERN: '^([0-9]+)\.([0-9]+)\.([0-9]+)(-rc\.([0-9]+))?$'
|
||||
ARENA_ARTIFACTS_CHART: arena-artifacts
|
||||
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "$GITHUB_ACTOR"
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Check whether version matches semver pattern
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
if [[ ${VERSION} =~ ${{ env.SEMVER_PATTERN }} ]]; then
|
||||
echo "Version '${VERSION}' matches semver pattern."
|
||||
else
|
||||
echo "Version '${VERSION}' does not match semver pattern."
|
||||
exit 1
|
||||
fi
|
||||
echo "VERSION=${VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Check arena artifacts chart version and appVersion
|
||||
run: |
|
||||
CHART_VERSION=$(cat ${{ env.ARENA_ARTIFACTS_CHART }}/Chart.yaml | grep -e '^version:' | awk '{print $2}')
|
||||
CHART_APP_VERSION=$(cat ${{ env.ARENA_ARTIFACTS_CHART }}/Chart.yaml | grep -e '^appVersion:' | awk '{print $2}')
|
||||
if [[ ${CHART_VERSION} == ${VERSION} ]]; then
|
||||
echo "Chart version '${CHART_VERSION}' matches version '${VERSION}'."
|
||||
else
|
||||
echo "Chart version '${CHART_VERSION}' does not match version '${VERSION}'."
|
||||
exit 1
|
||||
fi
|
||||
if [[ ${CHART_APP_VERSION} == ${VERSION} ]]; then
|
||||
echo "Chart appVersion '${CHART_APP_VERSION}' matches version '${VERSION}'."
|
||||
else
|
||||
echo "Chart appVersion '${CHART_APP_VERSION}' does not match version '${VERSION}'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Check if tag exists
|
||||
run: |
|
||||
git fetch --tags
|
||||
if git tag -l | grep -q "^v${VERSION}$"; then
|
||||
echo "Tag 'v${VERSION}' already exists."
|
||||
exit 1
|
||||
else
|
||||
echo "Tag 'v${VERSION}' does not exist."
|
||||
fi
|
|
@ -0,0 +1,137 @@
|
|||
name: Integration Test
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- release-*
|
||||
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release-*
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.actor }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-arena:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Run go mod tidy
|
||||
run: |
|
||||
go mod tidy
|
||||
if ! git diff --quiet; then
|
||||
echo "Please run 'go mod tidy' to add missing and remove unused dependencies"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run go mod vendor
|
||||
run: |
|
||||
go mod vendor
|
||||
if ! git diff --quiet; then
|
||||
echo "Please run 'go mod vendor' to make vendored copy of dependencies"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run go fmt check
|
||||
run: |
|
||||
make go-fmt
|
||||
if ! git diff --quiet; then
|
||||
echo "Please run 'make go-fmt' to run go fmt against code"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run go vet check
|
||||
run: |
|
||||
make go-vet
|
||||
if ! git diff --quiet; then
|
||||
echo "Please run 'make go-vet' to run go vet against code"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Run golangci-lint
|
||||
run: |
|
||||
make go-lint
|
||||
|
||||
- name: Run Go unit tests
|
||||
run: |
|
||||
make unit-test
|
||||
|
||||
- name: Run Helm unit tests
|
||||
run: |
|
||||
make helm-unittest
|
||||
|
||||
- name: Build arena binary
|
||||
run: |
|
||||
make arena
|
||||
|
||||
build-java-sdk:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- uses: actions/setup-java@v5
|
||||
with:
|
||||
distribution: zulu
|
||||
java-version: 8
|
||||
|
||||
- name: Build Java SDK
|
||||
run: |
|
||||
make java-sdk
|
||||
|
||||
build-docs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Build docs
|
||||
run: |
|
||||
pip install -r docs/requirements.txt
|
||||
mkdocs build --strict
|
||||
|
||||
e2e-test:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: go.mod
|
||||
|
||||
- name: Set up Kind cluster
|
||||
uses: helm/kind-action@v1
|
||||
with:
|
||||
node_image: kindest/node:v1.29.10
|
||||
config: arena-artifacts/ci/kind-config.yaml
|
||||
|
||||
- name: Install arena client
|
||||
run: |
|
||||
make arena-installer
|
||||
tar -zxf arena-installer-*.tar.gz
|
||||
arena-installer-*/install.sh --only-binary
|
||||
|
||||
- name: Run e2e tests
|
||||
run: |
|
||||
make e2e-test
|
|
@ -0,0 +1,242 @@
|
|||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- VERSION
|
||||
|
||||
env:
|
||||
IMAGE_REGISTRY: ghcr.io
|
||||
IMAGE_REPOSITORY: ${{ github.repository }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
package-arena-installer:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os:
|
||||
- linux
|
||||
- darwin
|
||||
arch:
|
||||
- amd64
|
||||
- arm64
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Read version from VERSION file
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
echo "VERSION=${VERSION}" >> ${GITHUB_ENV}
|
||||
|
||||
- name: Get git commit id
|
||||
run: |
|
||||
COMMIT=$(git rev-parse --short HEAD)
|
||||
echo "COMMIT=${COMMIT}" >>${GITHUB_ENV}
|
||||
|
||||
- name: Build arena installer tarball
|
||||
run: |
|
||||
make arena-installer OS=${{ matrix.os }} ARCH=${{ matrix.arch }}
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: arena-installer-${{ env.VERSION }}-${{ matrix.os }}-${{ matrix.arch }}
|
||||
path: arena-installer-${{ env.VERSION }}-${{ matrix.os }}-${{ matrix.arch }}.tar.gz
|
||||
if-no-files-found: error
|
||||
overwrite: true
|
||||
|
||||
build-arena-image:
|
||||
name: Build Arena container image
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
platform:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
|
||||
steps:
|
||||
- name: Prepare
|
||||
run: |
|
||||
platform=${{ matrix.platform }}
|
||||
echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV
|
||||
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Read version from VERSION file
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
echo "VERSION=${VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}},value=${{ env.VERSION }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.IMAGE_REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Build and push by digest
|
||||
id: build
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
platforms: ${{ matrix.platform }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
outputs: type=image,name=${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }},push-by-digest=true,name-canonical=true,push=true
|
||||
|
||||
- name: Export digest
|
||||
run: |
|
||||
mkdir -p /tmp/digests
|
||||
digest="${{ steps.build.outputs.digest }}"
|
||||
touch "/tmp/digests/${digest#sha256:}"
|
||||
|
||||
- name: Upload digest
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: digests-${{ env.PLATFORM_PAIR }}
|
||||
path: /tmp/digests/*
|
||||
if-no-files-found: error
|
||||
retention-days: 1
|
||||
|
||||
release-image:
|
||||
needs:
|
||||
- build-arena-image
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Read version from VERSION file
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
echo "VERSION=${VERSION}" >> $GITHUB_ENV
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}
|
||||
tags: |
|
||||
type=semver,pattern={{version}},value=${{ env.VERSION }}
|
||||
|
||||
- name: Download digests
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
path: /tmp/digests
|
||||
pattern: digests-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Set up Docker buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to container registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.IMAGE_REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create manifest list and push
|
||||
working-directory: /tmp/digests
|
||||
run: |
|
||||
docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \
|
||||
$(printf '${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}@sha256:%s ' *)
|
||||
|
||||
- name: Inspect image
|
||||
run: |
|
||||
docker buildx imagetools inspect ${{ env.IMAGE_REGISTRY }}/${{ env.IMAGE_REPOSITORY }}:${{ steps.meta.outputs.version }}
|
||||
|
||||
push_tag:
|
||||
needs:
|
||||
- package-arena-installer
|
||||
- release-image
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout source code
|
||||
uses: actions/checkout@v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "$GITHUB_ACTOR"
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Read version from VERSION file
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
echo "VERSION=${VERSION}" >> ${GITHUB_ENV}
|
||||
|
||||
- name: Create and push tag
|
||||
run: |
|
||||
TAG="v${VERSION}"
|
||||
git tag -a ${TAG} -m "Release v${VERSION}"
|
||||
git push origin ${TAG}
|
||||
|
||||
draft_release:
|
||||
needs:
|
||||
- push_tag
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "$GITHUB_ACTOR"
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Read version from VERSION file
|
||||
run: |
|
||||
VERSION=$(cat VERSION)
|
||||
echo "VERSION=${VERSION}" >> ${GITHUB_ENV}
|
||||
|
||||
- name: Download arena installer tarballs
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
pattern: arena-installer-${{ env.VERSION }}-{linux,darwin}-{amd64,arm64}
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
tag_name: v${{ env.VERSION }}
|
||||
prerelease: ${{ contains(env.VERSION, 'rc') }}
|
||||
target_commitish: ${{ github.sha }}
|
||||
draft: true
|
||||
files: |
|
||||
arena-installer-*/arena-installer-*.tar.gz
|
||||
fail_on_unmatched_files: true
|
|
@ -0,0 +1,43 @@
|
|||
# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time.
|
||||
#
|
||||
# You can adjust the behavior by modifying this file.
|
||||
# For more information, see:
|
||||
# https://github.com/actions/stale
|
||||
|
||||
name: Mark stale issues and pull requests
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * 0"
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: write
|
||||
pull-requests: write
|
||||
|
||||
steps:
|
||||
- uses: actions/stale@v9
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
days-before-stale: 360
|
||||
days-before-close: 180
|
||||
stale-issue-message: >
|
||||
This issue has been automatically marked as stale because it has not had
|
||||
recent activity. It will be closed if no further activity occurs. Thank you
|
||||
for your contributions.
|
||||
close-issue-message: >
|
||||
This issue has been automatically closed because it has not had recent
|
||||
activity. Please comment "/reopen" to reopen it.
|
||||
stale-issue-label: lifecycle/stale
|
||||
exempt-issue-labels: lifecycle/frozen
|
||||
stale-pr-message: >
|
||||
This pull request has been automatically marked as stale because it has not had
|
||||
recent activity. It will be closed if no further activity occurs. Thank you
|
||||
for your contributions.
|
||||
close-pr-message: >
|
||||
This pull request has been automatically closed because it has not had recent
|
||||
activity. Please comment "/reopen" to reopen it.
|
||||
stale-pr-label: lifecycle/stale
|
||||
exempt-pr-labels: lifecycle/frozen
|
|
@ -1,3 +1,25 @@
|
|||
bin/
|
||||
**/*.tgz
|
||||
**/.DS_Store
|
||||
Library
|
||||
public/
|
||||
site/
|
||||
tmp/
|
||||
sdk/arena-python-sdk/dist/
|
||||
sdk/arena-python-sdk/build/
|
||||
sdk/arena-python-sdk/arenasdk.egg-info/
|
||||
.hugo_build.lock
|
||||
.kube
|
||||
*.tgz
|
||||
*.tar.gz
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
|
||||
# Go
|
||||
cover.out
|
||||
|
||||
# IDE files
|
||||
.idea/
|
||||
.vscode/
|
||||
|
||||
# MacOS
|
||||
.DS_Store
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
version: "2"
|
||||
|
||||
run:
|
||||
# Timeout for total work, e.g. 30s, 5m, 5m30s.
|
||||
# If the value is lower or equal to 0, the timeout is disabled.
|
||||
# Default: 0 (disabled)
|
||||
timeout: 2m
|
||||
|
||||
linters:
|
||||
# Enable specific linters.
|
||||
# https://golangci-lint.run/usage/linters/#enabled-by-default
|
||||
enable:
|
||||
# Detects places where loop variables are copied.
|
||||
- copyloopvar
|
||||
# Checks for duplicate words in the source code.
|
||||
- dupword
|
||||
# Tool for detection of FIXME, TODO and other comment keywords.
|
||||
# - godox
|
||||
# Enforces consistent import aliases.
|
||||
- importas
|
||||
# Find code that shadows one of Go's predeclared identifiers.
|
||||
- predeclared
|
||||
# Check that struct tags are well aligned.
|
||||
- tagalign
|
||||
# Remove unnecessary type conversions.
|
||||
- unconvert
|
||||
# Checks Go code for unused constants, variables, functions and types.
|
||||
- unused
|
||||
# Disable specific linters.
|
||||
disable:
|
||||
# Errcheck is a program for checking for unchecked errors in Go code.
|
||||
- errcheck
|
||||
|
||||
settings:
|
||||
importas:
|
||||
# List of aliases
|
||||
alias:
|
||||
- pkg: k8s.io/api/admissionregistration/v1
|
||||
alias: admissionregistrationv1
|
||||
- pkg: k8s.io/api/apps/v1
|
||||
alias: appsv1
|
||||
- pkg: k8s.io/api/batch/v1
|
||||
alias: batchv1
|
||||
- pkg: k8s.io/api/core/v1
|
||||
alias: corev1
|
||||
- pkg: k8s.io/api/extensions/v1beta1
|
||||
alias: extensionsv1beta1
|
||||
- pkg: k8s.io/api/networking/v1
|
||||
alias: networkingv1
|
||||
- pkg: k8s.io/apimachinery/pkg/apis/meta/v1
|
||||
alias: metav1
|
||||
- pkg: sigs.k8s.io/controller-runtime
|
||||
alias: ctrl
|
||||
|
||||
exclusions:
|
||||
# Which file paths to exclude: they will be analyzed, but issues from them won't be reported.
|
||||
# "/" will be replaced by the current OS file path separator to properly work on Windows.
|
||||
# Default: []
|
||||
paths:
|
||||
- pkg/operators
|
||||
|
||||
issues:
|
||||
# Maximum issues count per one linter.
|
||||
# Set to 0 to disable.
|
||||
# Default: 50
|
||||
max-issues-per-linter: 50
|
||||
# Maximum count of issues with the same text.
|
||||
# Set to 0 to disable.
|
||||
# Default: 3
|
||||
max-same-issues: 10
|
||||
|
||||
formatters:
|
||||
enable:
|
||||
# Check import statements are formatted according to the 'goimport' command.
|
||||
- goimports
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the version of Python and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.12"
|
||||
|
||||
mkdocs:
|
||||
configuration: mkdocs.yml
|
||||
|
||||
# Optionally build your docs in additional formats such as PDF
|
||||
formats:
|
||||
- pdf
|
||||
|
||||
# Optionally set the version of Python and requirements required to build your docs
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
14
.travis.yml
14
.travis.yml
|
@ -1,14 +0,0 @@
|
|||
language: go
|
||||
|
||||
go:
|
||||
- "1.10"
|
||||
|
||||
go_import_path: github.com/kubeflow/arena
|
||||
|
||||
# let us have speedy Docker-based Travis workers
|
||||
sudo: false
|
||||
|
||||
script:
|
||||
- go build -o bin/arena cmd/arena/*.go
|
||||
- go vet ./...
|
||||
- go test ./...
|
244
CHANGELOG.md
244
CHANGELOG.md
|
@ -1,22 +1,236 @@
|
|||
## [Release 0.1.0]
|
||||
# Changelog
|
||||
|
||||
### Added
|
||||
## [v0.15.1](https://github.com/kubeflow/arena/tree/v0.15.1) (2025-06-25)
|
||||
|
||||
- Add TFJob v1alpha2 for Solo/Distributed Training, and support binpack and spread mode
|
||||
- Add Download Source Code from Git for Training
|
||||
- Add Tensorboard
|
||||
- Add top node/job for checking GPU allocations in Kubernetes
|
||||
- Add MPIJob v1alpha1 for Solo/Distributed Training
|
||||
- Add gang scheduling support for TFJob
|
||||
- Add Data
|
||||
- Add RDMA support
|
||||
### Features
|
||||
|
||||
### Changed
|
||||
- Add support for configuring tolerations ([#1337](https://github.com/kubeflow/arena/pull/1337) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Removed
|
||||
### Misc
|
||||
|
||||
### Fixed
|
||||
- Remove kubernetes artifacts ([#1329](https://github.com/kubeflow/arena/pull/1329) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- [CI] Add CI workflow for releasing Arena images ([#1340](https://github.com/kubeflow/arena/pull/1340) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Update uninstall bash script ([#1335](https://github.com/kubeflow/arena/pull/1335) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Fix golangci-lint issues ([#1341](https://github.com/kubeflow/arena/pull/1341) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump golang version from 1.22.7 to 1.23.10 ([#1345](https://github.com/kubeflow/arena/pull/1345) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- chore(deps): bump github.com/prometheus/common from 0.60.1 to 0.65.0 ([#1343](https://github.com/kubeflow/arena/pull/1343) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- chore(deps): bump golang.org/x/crypto from 0.38.0 to 0.39.0 ([#1334](https://github.com/kubeflow/arena/pull/1334) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
|
||||
### Deprecated
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.15.0...v0.15.1)
|
||||
|
||||
- HorovodJob is going to remove when MPIJob is production ready
|
||||
## [v0.15.0](https://github.com/kubeflow/arena/tree/v0.15.0) (2025-06-04)
|
||||
|
||||
### Features
|
||||
|
||||
- refactor: use helm lib instead of helm binary ([#1207](https://github.com/kubeflow/arena/pull/1207) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- feat: add new value for using localtime in cron-operator ([#1296](https://github.com/kubeflow/arena/pull/1296) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Delete all services when the TFJob is terminated ([#1316](https://github.com/kubeflow/arena/pull/1316) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Make number of replicas of cron-operator deployment configurable ([#1325](https://github.com/kubeflow/arena/pull/1325) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Make number of replicas of tf-operator deployment configurable ([#1323](https://github.com/kubeflow/arena/pull/1323) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add custom device support for kserve and kserving. ([#1315](https://github.com/kubeflow/arena/pull/1315) by [@Leoyzen](https://github.com/Leoyzen))
|
||||
- Feat: support affinity policy for kserve and tfjob ([#1319](https://github.com/kubeflow/arena/pull/1319) by [@Syspretor](https://github.com/Syspretor))
|
||||
- Feat: support separate affinity policy configuration for PS and worke… ([#1331](https://github.com/kubeflow/arena/pull/1331) by [@Syspretor](https://github.com/Syspretor))
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- fix: job status displays incorrectly ([#1289](https://github.com/kubeflow/arena/pull/1289) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- fix: service account should use release namespace ([#1308](https://github.com/kubeflow/arena/pull/1308) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- Add basic e2e tests ([#1225](https://github.com/kubeflow/arena/pull/1225) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump github.com/containerd/containerd from 1.7.23 to 1.7.27 ([#1290](https://github.com/kubeflow/arena/pull/1290) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Add stale bot to mark stale issues and PRs ([#1141](https://github.com/kubeflow/arena/pull/1141) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Fix typos in multiple files ([#1304](https://github.com/kubeflow/arena/pull/1304) by [@co63oc](https://github.com/co63oc))
|
||||
- Fix typos in multiple files ([#1310](https://github.com/kubeflow/arena/pull/1310) by [@co63oc](https://github.com/co63oc))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.14.2...v0.15.0)
|
||||
|
||||
## [v0.14.2](https://github.com/kubeflow/arena/tree/v0.14.2) (2025-03-10)
|
||||
|
||||
### Misc
|
||||
|
||||
- Fix typos ([#1276](https://github.com/kubeflow/arena/pull/1276) by [@co63oc](https://github.com/co63oc))
|
||||
- Update pytorch operator image ([#1281](https://github.com/kubeflow/arena/pull/1281) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.14.1...v0.14.2)
|
||||
|
||||
## [v0.14.1](https://github.com/kubeflow/arena/tree/v0.14.1) (2025-02-24)
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- fix: device value does not support k8s resource quantity ([#1267](https://github.com/kubeflow/arena/pull/1267) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- fix: pytorchjob does not support backoff limit ([#1272](https://github.com/kubeflow/arena/pull/1272) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- unset env NVIDIA_VISIBLE_DEVICES when gpushare is enabled ([#1273](https://github.com/kubeflow/arena/pull/1273) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- docs: fixed typo ([#1257](https://github.com/kubeflow/arena/pull/1257) by [@DBMxrco](https://github.com/DBMxrco))
|
||||
- Bump github.com/golang/glog from 1.2.3 to 1.2.4 ([#1263](https://github.com/kubeflow/arena/pull/1263) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- fix: format of tensorflow standalone training docs is messed up ([#1265](https://github.com/kubeflow/arena/pull/1265) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.14.0...v0.14.1)
|
||||
|
||||
## [v0.14.0](https://github.com/kubeflow/arena/tree/v0.14.0) (2025-02-12)
|
||||
|
||||
### Features
|
||||
|
||||
- rename parameter ([#1262](https://github.com/kubeflow/arena/pull/1262) by [@gujingit](https://github.com/gujingit))
|
||||
|
||||
### Misc
|
||||
|
||||
- Add changelog for v0.13.1 ([#1248](https://github.com/kubeflow/arena/pull/1248) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.16.0 to 2.16.5 ([#1254](https://github.com/kubeflow/arena/pull/1254) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.13.1...v0.14.0)
|
||||
|
||||
## [v0.13.1](https://github.com/kubeflow/arena/tree/v0.13.1) (2025-01-13)
|
||||
|
||||
### Misc
|
||||
|
||||
- feat: add linux/arm64 support for tf-operator image ([#1238](https://github.com/kubeflow/arena/pull/1238) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- feat: add linux/arm64 support for mpi-operator image ([#1239](https://github.com/kubeflow/arena/pull/1239) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- feat: add linux/arm64 support for cron-operator image ([#1240](https://github.com/kubeflow/arena/pull/1240) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- feat: add linux/arm64 support for et-operator image ([#1241](https://github.com/kubeflow/arena/pull/1241) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add PyTorch mnist example ([#1237](https://github.com/kubeflow/arena/pull/1237) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Update the version of elastic-job-supervisor in arena-artifacts ([#1247](https://github.com/kubeflow/arena/pull/1247) by [@AlanFokCo](https://github.com/AlanFokCo))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.13.0...v0.13.1)
|
||||
|
||||
## [v0.13.0](https://github.com/kubeflow/arena/tree/v0.13.0) (2024-12-23)
|
||||
|
||||
### New Features
|
||||
|
||||
- feat: add support for torchrun ([#1228](https://github.com/kubeflow/arena/pull/1228) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Update pytorch-operator image ([#1234](https://github.com/kubeflow/arena/pull/1234) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Bug Fix
|
||||
|
||||
- Avoid listing jobs and statefulsets when get pytorchjob ([#1229](https://github.com/kubeflow/arena/pull/1229) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- Update tfjob standalone training job doc ([#1222](https://github.com/kubeflow/arena/pull/1222) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Remove archived docs ([#1208](https://github.com/kubeflow/arena/pull/1208) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add changelog for v0.12.1 ([#1224](https://github.com/kubeflow/arena/pull/1224) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump golang.org/x/crypto from 0.29.0 to 0.31.0 ([#1231](https://github.com/kubeflow/arena/pull/1231) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump google.golang.org/protobuf from 1.35.1 to 1.36.0 ([#1227](https://github.com/kubeflow/arena/pull/1227) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.12.1...v0.13.0)
|
||||
|
||||
## [v0.12.1](https://github.com/kubeflow/arena/tree/v0.12.1) (2024-11-25)
|
||||
|
||||
### New Features
|
||||
|
||||
- Support MPI Job with generic devices ([#1209](https://github.com/kubeflow/arena/pull/1209) by [@cheyang](https://github.com/cheyang))
|
||||
|
||||
### Bug Fix
|
||||
|
||||
- Update tf-operator image to fix clean pod policy issues ([#1200](https://github.com/kubeflow/arena/pull/1200) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Fix etjob rendering error when using local logging dir ([#1203](https://github.com/kubeflow/arena/pull/1203) by [@TrafalgarZZZ](https://github.com/TrafalgarZZZ))
|
||||
- Fix the functionality of generating kubeconfig (#1204) ([#1205](https://github.com/kubeflow/arena/pull/1205) by [@wqlparallel](https://github.com/wqlparallel))
|
||||
- Update cron operator image ([#1214](https://github.com/kubeflow/arena/pull/1214) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- Add changelog for v0.12.0 ([#1199](https://github.com/kubeflow/arena/pull/1199) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add go mod vendor check to integration test ([#1198](https://github.com/kubeflow/arena/pull/1198) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- bump github.com/go-resty/resty/v2 from 2.15.3 to 2.16.0 ([#1202](https://github.com/kubeflow/arena/pull/1202) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Publish releases only on master branch ([#1210](https://github.com/kubeflow/arena/pull/1210) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add docs for releasing arena ([#1201](https://github.com/kubeflow/arena/pull/1201) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump golang.org/x/crypto from 0.28.0 to 0.29.0 ([#1206](https://github.com/kubeflow/arena/pull/1206) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Release v0.12.1 ([#1215](https://github.com/kubeflow/arena/pull/1215) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/29b2d6d2...v0.12.1)
|
||||
|
||||
## [v0.12.0](https://github.com/kubeflow/arena/tree/v0.12.0) (2024-11-11)
|
||||
|
||||
### New Features
|
||||
|
||||
- Feat: add support for distributed serving type ([#1187](https://github.com/kubeflow/arena/pull/1187) by [@linnlh](https://github.com/linnlh))
|
||||
- Support distributed serving with vendor update ([#1194](https://github.com/kubeflow/arena/pull/1194) by [@cheyang](https://github.com/cheyang))
|
||||
|
||||
### Misc
|
||||
|
||||
- Bump github.com/golang/glog from 1.2.2 to 1.2.3 ([#1189](https://github.com/kubeflow/arena/pull/1189) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/prometheus/common from 0.60.0 to 0.60.1 ([#1182](https://github.com/kubeflow/arena/pull/1182) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump mkdocs-material from 9.5.42 to 9.5.44 ([#1190](https://github.com/kubeflow/arena/pull/1190) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Release v0.12.0 ([#1197](https://github.com/kubeflow/arena/pull/1197) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/46a795e3...v0.12.0)
|
||||
|
||||
## [v0.11.0](https://github.com/kubeflow/arena/tree/v0.11.0) (2024-10-24)
|
||||
|
||||
### New Features
|
||||
|
||||
- Support ray job ([#1123](https://github.com/kubeflow/arena/pull/1123) by [@qile123](https://github.com/qile123))
|
||||
|
||||
### Misc
|
||||
|
||||
- Bump github.com/prometheus/client_golang from 1.20.4 to 1.20.5 ([#1176](https://github.com/kubeflow/arena/pull/1176) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump mkdocs-material from 9.5.40 to 9.5.42 ([#1179](https://github.com/kubeflow/arena/pull/1179) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/e15cb18...v0.11.0)
|
||||
|
||||
## [v0.10.1](https://github.com/kubeflow/arena/tree/v0.10.1) (2024-10-14)
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- fix: keep arena installer after installing the binary ([#1164](https://github.com/kubeflow/arena/pull/1164) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- fix: unsupported success policy when success policy is not specified ([#1170](https://github.com/kubeflow/arena/pull/1170) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- fix: failed to sync cache due to status subresouce missed in tfjob CRD ([#1173](https://github.com/kubeflow/arena/pull/1173) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- Bump github.com/prometheus/common from 0.59.1 to 0.60.0 ([#1160](https://github.com/kubeflow/arena/pull/1160) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump golang.org/x/crypto from 0.27.0 to 0.28.0 ([#1162](https://github.com/kubeflow/arena/pull/1162) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Migrate docker image to ACREE ([#1171](https://github.com/kubeflow/arena/pull/1171) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump mkdocs-material from 9.5.38 to 9.5.40 ([#1166](https://github.com/kubeflow/arena/pull/1166) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump google.golang.org/protobuf from 1.34.2 to 1.35.1 ([#1163](https://github.com/kubeflow/arena/pull/1163) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Remove redundant run_arena.sh file ([#1172](https://github.com/kubeflow/arena/pull/1172) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.10.0...v0.10.1)
|
||||
|
||||
## [v0.10.0](https://github.com/kubeflow/arena/tree/v0.10.0) (2024-09-29)
|
||||
|
||||
### New Features
|
||||
|
||||
- Support multiple type devices ([#1122](https://github.com/kubeflow/arena/pull/1122) by [@lizhiboo](https://github.com/lizhiboo))
|
||||
- Increase RSA key bit size from 1024 to 2048 ([#1130](https://github.com/kubeflow/arena/pull/1130) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Add success policy to TF training job ([#1148](https://github.com/kubeflow/arena/pull/1148) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Bug Fixes
|
||||
|
||||
- Fix submitting spark training jobs and update docs ([#1112](https://github.com/kubeflow/arena/pull/1112) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- docs: fix broken links and add CI for checking document build status ([#1131](https://github.com/kubeflow/arena/pull/1131) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- [Bugfix] Make PytorchJob devices format to key=value ([#1155](https://github.com/kubeflow/arena/pull/1155) by [@AlanFokCo](https://github.com/AlanFokCo))
|
||||
|
||||
### SDK
|
||||
|
||||
- Bump arena Java SDK version to 1.0.8 ([#1124](https://github.com/kubeflow/arena/pull/1124) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
### Misc
|
||||
|
||||
- Remove docker dependency ([#1113](https://github.com/kubeflow/arena/pull/1113) by [@Syulin7](https://github.com/Syulin7))
|
||||
- Update Makefile and release workflow ([#1128](https://github.com/kubeflow/arena/pull/1128) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- chore: remove travis and circle CI ([#1129](https://github.com/kubeflow/arena/pull/1129) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- chore: add issue templates and update depenabot bot ([#1140](https://github.com/kubeflow/arena/pull/1140) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump github.com/golang/glog from 1.1.2 to 1.2.2 ([#1139](https://github.com/kubeflow/arena/pull/1139) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump golang.org/x/crypto from 0.21.0 to 0.27.0 ([#1126](https://github.com/kubeflow/arena/pull/1126) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/spf13/cobra from 1.8.0 to 1.8.1 ([#1137](https://github.com/kubeflow/arena/pull/1137) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.12.0 to 2.14.0 ([#1134](https://github.com/kubeflow/arena/pull/1134) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/kserve/kserve from 0.13.0 to 0.13.1 ([#1135](https://github.com/kubeflow/arena/pull/1135) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/prometheus/common from 0.45.0 to 0.59.1 ([#1138](https://github.com/kubeflow/arena/pull/1138) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump client-java from 10.0.1 to 11.0.1 ([#1132](https://github.com/kubeflow/arena/pull/1132) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump github.com/prometheus/client_golang from 1.20.0 to 1.20.4 ([#1144](https://github.com/kubeflow/arena/pull/1144) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.14.0 to 2.15.0 ([#1143](https://github.com/kubeflow/arena/pull/1143) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump mkdocs-material from 9.5.34 to 9.5.35 ([#1145](https://github.com/kubeflow/arena/pull/1145) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.15.0 to 2.15.1 ([#1147](https://github.com/kubeflow/arena/pull/1147) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.15.1 to 2.15.2 ([#1150](https://github.com/kubeflow/arena/pull/1150) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump mkdocs-material from 9.5.35 to 9.5.36 ([#1151](https://github.com/kubeflow/arena/pull/1151) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump golang from 1.21 to 1.22.7 ([#1142](https://github.com/kubeflow/arena/pull/1142) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
- Bump mkdocs-material from 9.5.36 to 9.5.38 ([#1153](https://github.com/kubeflow/arena/pull/1153) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Bump github.com/go-resty/resty/v2 from 2.15.2 to 2.15.3 ([#1156](https://github.com/kubeflow/arena/pull/1156) by [@dependabot[bot]](https://github.com/apps/dependabot))
|
||||
- Release v0.10.0 ([#1157](https://github.com/kubeflow/arena/pull/1157) by [@ChenYi015](https://github.com/ChenYi015))
|
||||
|
||||
[Full Changelog](https://github.com/kubeflow/arena/compare/v0.9.16...v0.10.0)
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
ARG BASE_IMAGE=debian:12-slim
|
||||
|
||||
FROM golang:1.24.0 AS builder
|
||||
|
||||
ARG TARGETOS
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
WORKDIR /workspace
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN set -eux && \
|
||||
VERSION=$(cat VERSION) && \
|
||||
make arena-installer OS=${TARGETOS} ARCH=${TARGETARCH} && \
|
||||
mv arena-installer-${VERSION}-${TARGETOS}-${TARGETARCH}.tar.gz arena-installer.tar.gz
|
||||
|
||||
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
ARG TARGETOS
|
||||
|
||||
ARG TARGETARCH
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y tini \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /workspace/arena-installer.tar.gz .
|
||||
|
||||
RUN set -eux && \
|
||||
tar -zxvf arena-installer.tar.gz && \
|
||||
mv arena-installer-*-${TARGETOS}-${TARGETARCH} arena-installer && \
|
||||
arena-installer/install.sh --only-binary && \
|
||||
rm -rf arena-installer.tar.gz
|
||||
|
||||
COPY entrypoint.sh /usr/local/bin/
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
|
|
@ -1,39 +0,0 @@
|
|||
FROM golang:1.10-stretch as build
|
||||
|
||||
RUN mkdir -p /go/src/github.com/kubeflow/arena
|
||||
|
||||
WORKDIR /go/src/github.com/kubeflow/arena
|
||||
COPY . .
|
||||
|
||||
RUN make
|
||||
|
||||
RUN wget https://storage.googleapis.com/kubernetes-helm/helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
tar -xvf helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
mv linux-amd64/helm /usr/local/bin/helm && \
|
||||
chmod u+x /usr/local/bin/helm
|
||||
|
||||
ENV K8S_VERSION v1.11.2
|
||||
RUN curl -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x /usr/local/bin/kubectl
|
||||
|
||||
|
||||
FROM centos:7
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/bin/arena /usr/local/bin/arena
|
||||
|
||||
COPY --from=build /usr/local/bin/helm /usr/local/bin/helm
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/kubernetes-artifacts /root/kubernetes-artifacts
|
||||
|
||||
COPY --from=build /usr/local/bin/kubectl /usr/local/bin/kubectl
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/charts /charts
|
||||
|
||||
ADD run_arena.sh /usr/local/bin
|
||||
|
||||
RUN chmod u+x /usr/local/bin/run_arena.sh
|
||||
|
||||
RUN yum install bash-completion -y && \
|
||||
echo "source <(arena completion bash)" >> ~/.bashrc
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/run_arena.sh"]
|
||||
|
|
@ -3,7 +3,7 @@ ARG BASE_IMAGE=tensorflow/tensorflow:1.12.0-devel-py3
|
|||
|
||||
ARG USER=root
|
||||
|
||||
FROM golang:1.10-stretch as build
|
||||
FROM golang:1.23.10 AS build
|
||||
|
||||
RUN mkdir -p /go/src/github.com/kubeflow/arena
|
||||
|
||||
|
@ -12,19 +12,19 @@ COPY . .
|
|||
|
||||
RUN make
|
||||
|
||||
RUN wget https://storage.googleapis.com/kubernetes-helm/helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
tar -xvf helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
RUN wget https://get.helm.sh/helm-v3.13.3-linux-amd64.tar.gz && \
|
||||
tar -xvf helm-v3.13.3-linux-amd64.tar.gz && \
|
||||
mv linux-amd64/helm /usr/local/bin/helm && \
|
||||
chmod u+x /usr/local/bin/helm
|
||||
|
||||
ENV K8S_VERSION v1.11.2
|
||||
ENV K8S_VERSION v1.28.4
|
||||
RUN curl -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x /usr/local/bin/kubectl
|
||||
|
||||
FROM $BASE_IMAGE
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/bin/arena /usr/local/bin/arena
|
||||
|
||||
COPY --from=build /usr/local/bin/helm /usr/local/bin/helm
|
||||
COPY --from=build /usr/local/bin/helm /usr/local/bin/arena-helm
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/charts /charts
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@ ARG BASE_IMAGE=registry.aliyuncs.com/kubeflow-images-public/tensorflow-1.12.0-no
|
|||
|
||||
ARG USER=jovyan
|
||||
|
||||
FROM golang:1.10-stretch as build
|
||||
FROM golang:1.23.10 AS build
|
||||
|
||||
RUN mkdir -p /go/src/github.com/kubeflow/arena
|
||||
|
||||
|
@ -11,19 +11,19 @@ COPY . .
|
|||
|
||||
RUN make
|
||||
|
||||
RUN wget https://storage.googleapis.com/kubernetes-helm/helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
tar -xvf helm-v2.9.1-linux-amd64.tar.gz && \
|
||||
RUN wget https://get.helm.sh/helm-v3.13.3-linux-amd64.tar.gz && \
|
||||
tar -xvf helm-v3.13.3-linux-amd64.tar.gz && \
|
||||
mv linux-amd64/helm /usr/local/bin/helm && \
|
||||
chmod u+x /usr/local/bin/helm
|
||||
|
||||
ENV K8S_VERSION v1.11.2
|
||||
ENV K8S_VERSION v1.28.4
|
||||
RUN curl -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x /usr/local/bin/kubectl
|
||||
|
||||
FROM $BASE_IMAGE
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/bin/arena /usr/local/bin/arena
|
||||
|
||||
COPY --from=build /usr/local/bin/helm /usr/local/bin/helm
|
||||
COPY --from=build /usr/local/bin/helm /usr/local/bin/arena-helm
|
||||
|
||||
COPY --from=build /go/src/github.com/kubeflow/arena/charts /charts
|
||||
|
||||
|
@ -35,4 +35,4 @@ RUN apt-get update && \
|
|||
echo "source /etc/bash_completion" >> /etc/bash.bashrc && \
|
||||
echo "source <(arena completion bash)" >> /etc/bash.bashrc
|
||||
|
||||
USER $USER
|
||||
USER $USER
|
||||
|
|
|
@ -1,627 +0,0 @@
|
|||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
|
||||
|
||||
|
||||
[[projects]]
|
||||
digest = "1:5c3894b2aa4d6bead0ceeea6831b305d62879c871780e7b76296ded1b004bc57"
|
||||
name = "cloud.google.com/go"
|
||||
packages = ["compute/metadata"]
|
||||
pruneopts = "UT"
|
||||
revision = "97efc2c9ffd9fe8ef47f7f3203dc60bbca547374"
|
||||
version = "v0.28.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:d1665c44bd5db19aaee18d1b6233c99b0b9a986e8bccb24ef54747547a48027f"
|
||||
name = "github.com/PuerkitoBio/purell"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "0bcb03f4b4d0a9428594752bd2a3b9aa0a9d4bd4"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:c739832d67eb1e9cc478a19cc1a1ccd78df0397bf8a32978b759152e205f644b"
|
||||
name = "github.com/PuerkitoBio/urlesc"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "de5bf2ad457846296e2031421a34e2568e304e35"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:7cb4fdca4c251b3ef8027c90ea35f70c7b661a593b9eeae34753c65499098bb1"
|
||||
name = "github.com/cpuguy83/go-md2man"
|
||||
packages = ["md2man"]
|
||||
pruneopts = "UT"
|
||||
revision = "20f5889cbdc3c73dbd2862796665e7c465ade7d1"
|
||||
version = "v1.0.8"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:a2c1d0e43bd3baaa071d1b9ed72c27d78169b2b269f71c105ac4ba34b1be4a39"
|
||||
name = "github.com/davecgh/go-spew"
|
||||
packages = ["spew"]
|
||||
pruneopts = "UT"
|
||||
revision = "346938d642f2ec3594ed81d874461961cd0faa76"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:899234af23e5793c34e06fd397f86ba33af5307b959b6a7afd19b63db065a9d7"
|
||||
name = "github.com/emicklei/go-restful"
|
||||
packages = [
|
||||
".",
|
||||
"log",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "3eb9738c1697594ea6e71a7156a9bb32ed216cf0"
|
||||
version = "v2.8.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:2cd7915ab26ede7d95b8749e6b1f933f1c6d5398030684e6505940a10f31cfda"
|
||||
name = "github.com/ghodss/yaml"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:2997679181d901ac8aaf4330d11138ecf3974c6d3334995ff36f20cbd597daf8"
|
||||
name = "github.com/go-openapi/jsonpointer"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "3a0015ad55fa9873f41605d3e8f28cd279c32ab2"
|
||||
version = "0.15.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:1ae3f233d75a731b164ca9feafd8ed646cbedf1784095876ed6988ce8aa88b1f"
|
||||
name = "github.com/go-openapi/jsonreference"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "3fb327e6747da3043567ee86abd02bb6376b6be2"
|
||||
version = "0.15.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:d538832fc6033760440c9b7058504c495542905c83925f5d846bc954ff899a3b"
|
||||
name = "github.com/go-openapi/spec"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "f1468acb3b29cdd5c5f6fa29435d2d2d6e6c9ff1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:731022b436cdb9b4b2a53be2ead693467a1474b8b873d4f90cb424fffdc3d0ff"
|
||||
name = "github.com/go-openapi/swag"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "2b0bd4f193d011c203529df626a65d63cb8a79e8"
|
||||
version = "0.15.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:3f7c586d2fd571c4f2e3d313c0638b33b999055de507d5f11fafe27e5adc30db"
|
||||
name = "github.com/gogo/protobuf"
|
||||
packages = [
|
||||
"gogoproto",
|
||||
"proto",
|
||||
"protoc-gen-gogo/descriptor",
|
||||
"sortkeys",
|
||||
"types",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "636bf0302bc95575d69441b25a2603156ffdddf1"
|
||||
version = "v1.1.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:1ba1d79f2810270045c328ae5d674321db34e3aae468eb4233883b473c5c0467"
|
||||
name = "github.com/golang/glog"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:17fe264ee908afc795734e8c4e63db2accabaf57326dbf21763a7d6b86096260"
|
||||
name = "github.com/golang/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"ptypes",
|
||||
"ptypes/any",
|
||||
"ptypes/duration",
|
||||
"ptypes/timestamp",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "b4deda0973fb4c70b50d226b1af49f3da59f5265"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:9887333bbef17574b1db5f9893ea137ac44107235d624408a3ac9e0b98fbb2cb"
|
||||
name = "github.com/google/btree"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "e89373fe6b4a7413d7acd6da1725b83ef713e6e4"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:3ee90c0d94da31b442dde97c99635aaafec68d0b8a3c12ee2075c6bdabeec6bb"
|
||||
name = "github.com/google/gofuzz"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "24818f796faf91cd76ec7bddd72458fbced7a6c1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:65c4414eeb350c47b8de71110150d0ea8a281835b1f386eacaa3ad7325929c21"
|
||||
name = "github.com/googleapis/gnostic"
|
||||
packages = [
|
||||
"OpenAPIv2",
|
||||
"compiler",
|
||||
"extensions",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "7c663266750e7d82587642f65e60bc4083f1f84e"
|
||||
version = "v0.2.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:86c1210529e69d69860f2bb3ee9ccce0b595aa3f9165e7dd1388e5c612915888"
|
||||
name = "github.com/gregjones/httpcache"
|
||||
packages = [
|
||||
".",
|
||||
"diskcache",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "9cad4c3443a7200dd6400aef47183728de563a38"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:cf296baa185baae04a9a7004efee8511d08e2f5f51d4cbe5375da89722d681db"
|
||||
name = "github.com/hashicorp/golang-lru"
|
||||
packages = [
|
||||
".",
|
||||
"simplelru",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "0fb14efe8c47ae851c0034ed7a448854d3d34cf3"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:0778dc7fce1b4669a8bfa7ae506ec1f595b6ab0f8989c1c0d22a8ca1144e9972"
|
||||
name = "github.com/howeyc/gopass"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "bf9dde6d0d2c004a008c27aaee91170c786f6db8"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:3e260afa138eab6492b531a3b3d10ab4cb70512d423faa78b8949dec76e66a21"
|
||||
name = "github.com/imdario/mergo"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "9316a62528ac99aaecb4e47eadd6dc8aa6533d58"
|
||||
version = "v0.3.5"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:870d441fe217b8e689d7949fef6e43efbc787e50f200cb1e70dbca9204a1d6be"
|
||||
name = "github.com/inconshreveable/mousetrap"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "76626ae9c91c4f2a10f34cad8ce83ea42c93bb75"
|
||||
version = "v1.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:eaefc85d32c03e5f0c2b88ea2f79fce3d993e2c78316d21319575dd4ea9153ca"
|
||||
name = "github.com/json-iterator/go"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "ab8a2e0c74be9d3be70b3184d9acc634935ded82"
|
||||
version = "1.1.4"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:190ff84d9b2ed6589088f178cba8edb4b8ecb334df4572421fb016be1ac20463"
|
||||
name = "github.com/juju/ratelimit"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "59fac5042749a5afb9af70e813da1dd5474f0167"
|
||||
version = "1.0.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:fb1215a67b0e5aa0770c8486f465f4682be334d9430d7ea7b566fc40fb70601e"
|
||||
name = "github.com/kubeflow/tf-operator"
|
||||
packages = [
|
||||
"pkg/apis/tensorflow/v1alpha1",
|
||||
"pkg/apis/tensorflow/v1alpha2",
|
||||
"pkg/client/clientset/versioned",
|
||||
"pkg/client/clientset/versioned/scheme",
|
||||
"pkg/client/clientset/versioned/typed/kubeflow/v1alpha1",
|
||||
"pkg/client/clientset/versioned/typed/kubeflow/v1alpha2",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "38b886a0af6e08c31d93c1db6b49f4f79ab8de1e"
|
||||
version = "v0.2.0-rc1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:89cb38858ea53690cdcd952f35b6cdda405dc9f8fc5601872e01b49c88c59991"
|
||||
name = "github.com/mailru/easyjson"
|
||||
packages = [
|
||||
"buffer",
|
||||
"jlexer",
|
||||
"jwriter",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "d5012789d6659eeed305f54c1b1542e7b65829e6"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:5d231480e1c64a726869bc4142d270184c419749d34f167646baa21008eb0a79"
|
||||
name = "github.com/mitchellh/go-homedir"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "af06845cf3004701891bf4fdb884bfe4920b3727"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:33422d238f147d247752996a26574ac48dcf472976eda7f5134015f06bf16563"
|
||||
name = "github.com/modern-go/concurrent"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "bacd9c7ef1dd9b15be4a9909b8ac7a4e313eec94"
|
||||
version = "1.0.3"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:e32bdbdb7c377a07a9a46378290059822efdce5c8d96fe71940d87cb4f918855"
|
||||
name = "github.com/modern-go/reflect2"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "4b7aa43c6742a2c18fdef89dd197aaae7dac7ccd"
|
||||
version = "1.0.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:3bf17a6e6eaa6ad24152148a631d18662f7212e21637c2699bff3369b7f00fa2"
|
||||
name = "github.com/petar/GoLLRB"
|
||||
packages = ["llrb"]
|
||||
pruneopts = "UT"
|
||||
revision = "53be0d36a84c2a886ca057d34b6aa4468df9ccb4"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:0e7775ebbcf00d8dd28ac663614af924411c868dca3d5aa762af0fae3808d852"
|
||||
name = "github.com/peterbourgon/diskv"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "5f041e8faa004a95c88a202771f4cc3e991971e6"
|
||||
version = "v2.0.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:8bc629776d035c003c7814d4369521afe67fdb8efc4b5f66540d29343b98cf23"
|
||||
name = "github.com/russross/blackfriday"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "55d61fa8aa702f59229e6cff85793c22e580eaf5"
|
||||
version = "v1.5.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:d867dfa6751c8d7a435821ad3b736310c2ed68945d05b50fb9d23aee0540c8cc"
|
||||
name = "github.com/sirupsen/logrus"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "3e01752db0189b9157070a0e1668a620f9a85da2"
|
||||
version = "v1.0.6"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:e01b05ba901239c783dfe56450bcde607fc858908529868259c9a8765dc176d0"
|
||||
name = "github.com/spf13/cobra"
|
||||
packages = [
|
||||
".",
|
||||
"doc",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "ef82de70bb3f60c65fb8eebacbb2d122ef517385"
|
||||
version = "v0.0.3"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:9424f440bba8f7508b69414634aef3b2b3a877e522d8a4624692412805407bb7"
|
||||
name = "github.com/spf13/pflag"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "583c0c0531f06d5278b7d917446061adc344b5cd"
|
||||
version = "v1.0.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:3f3a05ae0b95893d90b9b3b5afdb79a9b3d96e4e36e099d841ae602e4aca0da8"
|
||||
name = "golang.org/x/crypto"
|
||||
packages = ["ssh/terminal"]
|
||||
pruneopts = "UT"
|
||||
revision = "c126467f60eb25f8f27e5a981f32a87e3965053f"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:1f71d110bfc1caef79e72c3606c6110fc4d9b606124fe56bd3c2615325957c3d"
|
||||
name = "golang.org/x/net"
|
||||
packages = [
|
||||
"context",
|
||||
"context/ctxhttp",
|
||||
"http/httpguts",
|
||||
"http2",
|
||||
"http2/hpack",
|
||||
"idna",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "3673e40ba22529d22c3fd7c93e97b0ce50fa7bdd"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:f645667d687fc8bf228865a2c5455824ef05bad08841e673673ef2bb89ac5b90"
|
||||
name = "golang.org/x/oauth2"
|
||||
packages = [
|
||||
".",
|
||||
"google",
|
||||
"internal",
|
||||
"jws",
|
||||
"jwt",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "d2e6202438beef2727060aa7cabdd924d92ebfd9"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
digest = "1:8742e6e73627b2877c3f723bc1823d5667ec59011242480309dc90fa862512aa"
|
||||
name = "golang.org/x/sys"
|
||||
packages = [
|
||||
"unix",
|
||||
"windows",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "bd9dbc187b6e1dacfdd2722a87e83093c2d7bd6e"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:0c56024909189aee3364b7f21a95a27459f718aa7c199a5c111c36cfffd9eaef"
|
||||
name = "golang.org/x/text"
|
||||
packages = [
|
||||
"collate",
|
||||
"collate/build",
|
||||
"internal/colltab",
|
||||
"internal/gen",
|
||||
"internal/tag",
|
||||
"internal/triegen",
|
||||
"internal/ucd",
|
||||
"language",
|
||||
"secure/bidirule",
|
||||
"transform",
|
||||
"unicode/bidi",
|
||||
"unicode/cldr",
|
||||
"unicode/norm",
|
||||
"unicode/rangetable",
|
||||
"width",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
||||
version = "v0.3.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:c8907869850adaa8bd7631887948d0684f3787d0912f1c01ab72581a6c34432e"
|
||||
name = "google.golang.org/appengine"
|
||||
packages = [
|
||||
".",
|
||||
"internal",
|
||||
"internal/app_identity",
|
||||
"internal/base",
|
||||
"internal/datastore",
|
||||
"internal/log",
|
||||
"internal/modules",
|
||||
"internal/remote_api",
|
||||
"internal/urlfetch",
|
||||
"urlfetch",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "b1f26356af11148e710935ed1ac8a7f5702c7612"
|
||||
version = "v1.1.0"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:2d1fbdc6777e5408cabeb02bf336305e724b925ff4546ded0fa8715a7267922a"
|
||||
name = "gopkg.in/inf.v0"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "d2d2541c53f18d2a059457998ce2876cc8e67cbf"
|
||||
version = "v0.9.1"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:342378ac4dcb378a5448dd723f0784ae519383532f5e70ade24132c4c8693202"
|
||||
name = "gopkg.in/yaml.v2"
|
||||
packages = ["."]
|
||||
pruneopts = "UT"
|
||||
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
|
||||
version = "v2.2.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.0"
|
||||
digest = "1:9e4331f3699eb48251aea4db7ae2774ff00b8c627bd1ceec021e6501cca9055f"
|
||||
name = "istio.io/api"
|
||||
packages = ["networking/v1alpha3"]
|
||||
pruneopts = "UT"
|
||||
revision = "76349c53b87f03f1e610b3aa3843dba3c38138d7"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.9"
|
||||
digest = "1:81094787d3b75d7520d7dc92bdcabff480575a678c4f409a8bb30359cf60ac21"
|
||||
name = "k8s.io/api"
|
||||
packages = [
|
||||
"admissionregistration/v1alpha1",
|
||||
"admissionregistration/v1beta1",
|
||||
"apps/v1",
|
||||
"apps/v1beta1",
|
||||
"apps/v1beta2",
|
||||
"authentication/v1",
|
||||
"authentication/v1beta1",
|
||||
"authorization/v1",
|
||||
"authorization/v1beta1",
|
||||
"autoscaling/v1",
|
||||
"autoscaling/v2beta1",
|
||||
"batch/v1",
|
||||
"batch/v1beta1",
|
||||
"batch/v2alpha1",
|
||||
"certificates/v1beta1",
|
||||
"core/v1",
|
||||
"events/v1beta1",
|
||||
"extensions/v1beta1",
|
||||
"networking/v1",
|
||||
"policy/v1beta1",
|
||||
"rbac/v1",
|
||||
"rbac/v1alpha1",
|
||||
"rbac/v1beta1",
|
||||
"scheduling/v1alpha1",
|
||||
"settings/v1alpha1",
|
||||
"storage/v1",
|
||||
"storage/v1alpha1",
|
||||
"storage/v1beta1",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "9273ee02527c608cecc74969b3e489f5dba686da"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.9"
|
||||
digest = "1:8461089e75268de679d74be6b211ba71bb666159ffdc2b1979b0411553cb587f"
|
||||
name = "k8s.io/apimachinery"
|
||||
packages = [
|
||||
"pkg/api/errors",
|
||||
"pkg/api/meta",
|
||||
"pkg/api/resource",
|
||||
"pkg/apis/meta/internalversion",
|
||||
"pkg/apis/meta/v1",
|
||||
"pkg/apis/meta/v1/unstructured",
|
||||
"pkg/apis/meta/v1alpha1",
|
||||
"pkg/conversion",
|
||||
"pkg/conversion/queryparams",
|
||||
"pkg/fields",
|
||||
"pkg/labels",
|
||||
"pkg/runtime",
|
||||
"pkg/runtime/schema",
|
||||
"pkg/runtime/serializer",
|
||||
"pkg/runtime/serializer/json",
|
||||
"pkg/runtime/serializer/protobuf",
|
||||
"pkg/runtime/serializer/recognizer",
|
||||
"pkg/runtime/serializer/streaming",
|
||||
"pkg/runtime/serializer/versioning",
|
||||
"pkg/selection",
|
||||
"pkg/types",
|
||||
"pkg/util/cache",
|
||||
"pkg/util/clock",
|
||||
"pkg/util/diff",
|
||||
"pkg/util/errors",
|
||||
"pkg/util/framer",
|
||||
"pkg/util/intstr",
|
||||
"pkg/util/json",
|
||||
"pkg/util/net",
|
||||
"pkg/util/runtime",
|
||||
"pkg/util/sets",
|
||||
"pkg/util/validation",
|
||||
"pkg/util/validation/field",
|
||||
"pkg/util/wait",
|
||||
"pkg/util/yaml",
|
||||
"pkg/version",
|
||||
"pkg/watch",
|
||||
"third_party/forked/golang/reflect",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "fb40df2b502912cbe3a93aa61c2b2487f39cb42f"
|
||||
|
||||
[[projects]]
|
||||
digest = "1:db9ab610c580b059b40840a3158fdf66a931e13be2192e23a670f800f061f79c"
|
||||
name = "k8s.io/client-go"
|
||||
packages = [
|
||||
"discovery",
|
||||
"kubernetes",
|
||||
"kubernetes/scheme",
|
||||
"kubernetes/typed/admissionregistration/v1alpha1",
|
||||
"kubernetes/typed/admissionregistration/v1beta1",
|
||||
"kubernetes/typed/apps/v1",
|
||||
"kubernetes/typed/apps/v1beta1",
|
||||
"kubernetes/typed/apps/v1beta2",
|
||||
"kubernetes/typed/authentication/v1",
|
||||
"kubernetes/typed/authentication/v1beta1",
|
||||
"kubernetes/typed/authorization/v1",
|
||||
"kubernetes/typed/authorization/v1beta1",
|
||||
"kubernetes/typed/autoscaling/v1",
|
||||
"kubernetes/typed/autoscaling/v2beta1",
|
||||
"kubernetes/typed/batch/v1",
|
||||
"kubernetes/typed/batch/v1beta1",
|
||||
"kubernetes/typed/batch/v2alpha1",
|
||||
"kubernetes/typed/certificates/v1beta1",
|
||||
"kubernetes/typed/core/v1",
|
||||
"kubernetes/typed/events/v1beta1",
|
||||
"kubernetes/typed/extensions/v1beta1",
|
||||
"kubernetes/typed/networking/v1",
|
||||
"kubernetes/typed/policy/v1beta1",
|
||||
"kubernetes/typed/rbac/v1",
|
||||
"kubernetes/typed/rbac/v1alpha1",
|
||||
"kubernetes/typed/rbac/v1beta1",
|
||||
"kubernetes/typed/scheduling/v1alpha1",
|
||||
"kubernetes/typed/settings/v1alpha1",
|
||||
"kubernetes/typed/storage/v1",
|
||||
"kubernetes/typed/storage/v1alpha1",
|
||||
"kubernetes/typed/storage/v1beta1",
|
||||
"pkg/version",
|
||||
"plugin/pkg/client/auth/gcp",
|
||||
"rest",
|
||||
"rest/watch",
|
||||
"testing",
|
||||
"third_party/forked/golang/template",
|
||||
"tools/auth",
|
||||
"tools/cache",
|
||||
"tools/clientcmd",
|
||||
"tools/clientcmd/api",
|
||||
"tools/clientcmd/api/latest",
|
||||
"tools/clientcmd/api/v1",
|
||||
"tools/metrics",
|
||||
"tools/pager",
|
||||
"tools/reference",
|
||||
"transport",
|
||||
"util/buffer",
|
||||
"util/cert",
|
||||
"util/flowcontrol",
|
||||
"util/homedir",
|
||||
"util/integer",
|
||||
"util/jsonpath",
|
||||
]
|
||||
pruneopts = "UT"
|
||||
revision = "78700dec6369ba22221b72770783300f143df150"
|
||||
version = "v6.0.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.9"
|
||||
digest = "1:f5487c07872bdb7c40ffe629430b2fa815f9eca0d2c02bb9e866962eb38a0e70"
|
||||
name = "k8s.io/kube-openapi"
|
||||
packages = ["pkg/common"]
|
||||
pruneopts = "UT"
|
||||
revision = "7ee50c0aa8059d610950c952a9ed7a5e33ab336a"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
input-imports = [
|
||||
"github.com/golang/glog",
|
||||
"github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1alpha2",
|
||||
"github.com/kubeflow/tf-operator/pkg/client/clientset/versioned",
|
||||
"github.com/mitchellh/go-homedir",
|
||||
"github.com/sirupsen/logrus",
|
||||
"github.com/spf13/cobra",
|
||||
"github.com/spf13/cobra/doc",
|
||||
"gopkg.in/yaml.v2",
|
||||
"istio.io/api/networking/v1alpha3",
|
||||
"k8s.io/api/batch/v1",
|
||||
"k8s.io/api/core/v1",
|
||||
"k8s.io/apimachinery/pkg/api/errors",
|
||||
"k8s.io/apimachinery/pkg/apis/meta/v1",
|
||||
"k8s.io/apimachinery/pkg/fields",
|
||||
"k8s.io/apimachinery/pkg/labels",
|
||||
"k8s.io/apimachinery/pkg/runtime",
|
||||
"k8s.io/apimachinery/pkg/runtime/schema",
|
||||
"k8s.io/apimachinery/pkg/runtime/serializer",
|
||||
"k8s.io/apimachinery/pkg/types",
|
||||
"k8s.io/apimachinery/pkg/util/sets",
|
||||
"k8s.io/apimachinery/pkg/watch",
|
||||
"k8s.io/client-go/discovery",
|
||||
"k8s.io/client-go/kubernetes",
|
||||
"k8s.io/client-go/plugin/pkg/client/auth/gcp",
|
||||
"k8s.io/client-go/rest",
|
||||
"k8s.io/client-go/testing",
|
||||
"k8s.io/client-go/tools/cache",
|
||||
"k8s.io/client-go/tools/clientcmd",
|
||||
"k8s.io/client-go/util/flowcontrol",
|
||||
]
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
67
Gopkg.toml
67
Gopkg.toml
|
@ -1,67 +0,0 @@
|
|||
# Gopkg.toml example
|
||||
#
|
||||
# Refer to https://golang.github.io/dep/docs/Gopkg.toml.html
|
||||
# for detailed Gopkg.toml documentation.
|
||||
#
|
||||
# required = ["github.com/user/thing/cmd/thing"]
|
||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project"
|
||||
# version = "1.0.0"
|
||||
#
|
||||
# [[constraint]]
|
||||
# name = "github.com/user/project2"
|
||||
# branch = "dev"
|
||||
# source = "github.com/myfork/project2"
|
||||
#
|
||||
# [[override]]
|
||||
# name = "github.com/x/y"
|
||||
# version = "2.4.0"
|
||||
#
|
||||
# [prune]
|
||||
# non-go = false
|
||||
# go-tests = true
|
||||
# unused-packages = true
|
||||
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/sirupsen/logrus"
|
||||
version = "1.0.5"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/spf13/cobra"
|
||||
version = "0.0.2"
|
||||
|
||||
[[constraint]]
|
||||
name = "gopkg.in/yaml.v2"
|
||||
version = "2.2.1"
|
||||
|
||||
[[constraint]]
|
||||
branch = "release-1.9"
|
||||
name = "k8s.io/api"
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/client-go"
|
||||
version = "~6.0.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/apimachinery"
|
||||
branch = "release-1.9"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/kubeflow/tf-operator"
|
||||
version = "v0.2.0-rc1"
|
||||
|
||||
[[constraint]]
|
||||
name = "github.com/gogo/protobuf"
|
||||
version = "v1.1.1"
|
||||
|
||||
[[constraint]]
|
||||
name = "istio.io/api"
|
||||
branch = "release-1.0"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
unused-packages = true
|
||||
|
298
Makefile
298
Makefile
|
@ -1,17 +1,64 @@
|
|||
PACKAGE=github.com/kubeflow/arena
|
||||
CURRENT_DIR=$(shell pwd)
|
||||
DIST_DIR=${CURRENT_DIR}/bin
|
||||
ARENA_CLI_NAME=arena
|
||||
JOB_MONITOR=jobmon
|
||||
.SILENT:
|
||||
|
||||
VERSION=$(shell cat ${CURRENT_DIR}/VERSION)
|
||||
BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
|
||||
GIT_COMMIT=$(shell git rev-parse HEAD)
|
||||
GIT_SHORT_COMMIT=$(shell git rev-parse --short HEAD)
|
||||
DOCKER_BUILD_DATE=$(shell date -u +'%Y%m%d%H%M%S')
|
||||
GIT_TAG=$(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi)
|
||||
GIT_TREE_STATE=$(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi)
|
||||
PACKR_CMD=$(shell if [ "`which packr`" ]; then echo "packr"; else echo "go run vendor/github.com/gobuffalo/packr/packr/main.go"; fi)
|
||||
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
|
||||
ifeq (,$(shell go env GOBIN))
|
||||
GOBIN=$(shell go env GOPATH)/bin
|
||||
else
|
||||
GOBIN=$(shell go env GOBIN)
|
||||
endif
|
||||
|
||||
# Setting SHELL to bash allows bash commands to be executed by recipes.
|
||||
# Options are set to exit when a recipe line exits non-zero or a piped command fails.
|
||||
SHELL = /usr/bin/env bash -o pipefail
|
||||
.SHELLFLAGS = -ec
|
||||
|
||||
PACKAGE ?= github.com/kubeflow/arena
|
||||
CURRENT_DIR ?= $(shell pwd)
|
||||
DIST_DIR ?= $(CURRENT_DIR)/bin
|
||||
ARENA_CLI_NAME ?= arena
|
||||
JOB_MONITOR ?= jobmon
|
||||
ARENA_UNINSTALL ?= arena-uninstall
|
||||
OS ?= $(shell go env GOOS)
|
||||
ARCH ?= $(shell go env GOARCH)
|
||||
|
||||
VERSION ?= $(shell cat VERSION)
|
||||
BUILD_DATE := $(shell date -u +'%Y-%m-%dT%H:%M:%SZ')
|
||||
GIT_COMMIT := $(shell git rev-parse HEAD)
|
||||
GIT_SHORT_COMMIT := $(shell git rev-parse --short HEAD)
|
||||
DOCKER_BUILD_DATE := $(shell date -u +'%Y%m%d%H%M%S')
|
||||
GIT_TAG := $(shell if [ -z "`git status --porcelain`" ]; then git describe --exact-match --tags HEAD 2>/dev/null; fi)
|
||||
GIT_TREE_STATE := $(shell if [ -z "`git status --porcelain`" ]; then echo "clean" ; else echo "dirty"; fi)
|
||||
PACKR_CMD := $(shell if [ "`which packr`" ]; then echo "packr"; else echo "go run vendor/github.com/gobuffalo/packr/packr/main.go"; fi)
|
||||
|
||||
# Location to install binaries
|
||||
LOCALBIN ?= $(CURRENT_DIR)/bin
|
||||
# Location to put temp files
|
||||
TEMPDIR ?= $(CURRENT_DIR)/tmp
|
||||
# ARENA_ARTIFACTS
|
||||
ARENA_ARTIFACTS_CHART_PATH ?= $(CURRENT_DIR)/arena-artifacts
|
||||
|
||||
# Versions
|
||||
GOLANG_VERSION=$(shell grep -e '^go ' go.mod | cut -d ' ' -f 2)
|
||||
KUBECTL_VERSION ?= v1.28.4
|
||||
HELM_VERSION ?= $(shell grep -e 'helm.sh/helm/v3 ' go.mod | cut -d ' ' -f 2)
|
||||
HELM_UNITTEST_VERSION ?= 0.5.1
|
||||
KIND_VERSION ?= v0.23.0
|
||||
KIND_K8S_VERSION ?= v1.29.3
|
||||
ENVTEST_VERSION ?= release-0.18
|
||||
ENVTEST_K8S_VERSION ?= 1.29.3
|
||||
GOLANGCI_LINT_VERSION ?= v2.1.6
|
||||
|
||||
# Binaries
|
||||
ARENA ?= arena-v$(VERSION)-$(OS)-$(ARCH)
|
||||
KUBECTL ?= kubectl-$(KUBECTL_VERSION)-$(OS)-$(ARCH)
|
||||
HELM ?= helm-$(HELM_VERSION)-$(OS)-$(ARCH)
|
||||
KIND ?= $(LOCALBIN)/kind-$(KIND_VERSION)
|
||||
ENVTEST ?= $(LOCALBIN)/setup-envtest-$(ENVTEST_VERSION)
|
||||
GOLANGCI_LINT ?= golangci-lint-$(GOLANGCI_LINT_VERSION)
|
||||
|
||||
# Tarballs
|
||||
ARENA_INSTALLER ?= arena-installer-$(VERSION)-$(OS)-$(ARCH)
|
||||
ARENA_INSTALLER_TARBALL ?= $(ARENA_INSTALLER).tar.gz
|
||||
|
||||
BUILDER_IMAGE=arena-builder
|
||||
BASE_IMAGE=registry.aliyuncs.com/kubeflow-images-public/tensorflow-1.12.0-notebook-gpu:v0.4.0
|
||||
|
@ -30,8 +77,12 @@ override LDFLAGS += \
|
|||
-extldflags "-static"
|
||||
|
||||
# docker image publishing options
|
||||
IMAGE_REGISTRY ?= docker.io
|
||||
IMAGE_REPOSITORY ?= kubeflow/arena
|
||||
IMAGE_TAG ?= $(VERSION)
|
||||
IMAGE ?= $(IMAGE_REGISTRY)/$(IMAGE_REPOSITORY):$(IMAGE_TAG)
|
||||
DOCKER_PUSH=false
|
||||
IMAGE_TAG=latest
|
||||
BASE_IMAGE ?= debian:12-slim
|
||||
|
||||
ifneq (${GIT_TAG},)
|
||||
IMAGE_TAG=${GIT_TAG}
|
||||
|
@ -54,44 +105,117 @@ ifdef IMAGE_NAMESPACE
|
|||
IMAGE_PREFIX=${IMAGE_NAMESPACE}/
|
||||
endif
|
||||
|
||||
##@ General
|
||||
|
||||
# The help target prints out all targets with their descriptions organized
|
||||
# beneath their categories. The categories are represented by '##@' and the
|
||||
# target descriptions by '##'. The awk command is responsible for reading the
|
||||
# entire set of makefiles included in this invocation, looking for lines of the
|
||||
# file as xyz: ## something, and then pretty-format the target and help. Then,
|
||||
# if there's a line with ##@ something, that gets pretty-printed as a category.
|
||||
# More info on the usage of ANSI control characters for terminal formatting:
|
||||
# https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters
|
||||
# More info on the awk command:
|
||||
# http://linuxcommand.org/lc3_adv_awk.php
|
||||
|
||||
.PHONY: help
|
||||
help: ## Display this help.
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-30s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
|
||||
|
||||
.PHONY: all
|
||||
all: go-fmt go-vet go-lint unit-test e2e-test
|
||||
|
||||
##@ Development
|
||||
|
||||
go-fmt: ## Run go fmt against code.
|
||||
@echo "Running go fmt..."
|
||||
go fmt ./...
|
||||
|
||||
go-vet: ## Run go vet against code.
|
||||
@echo "Running go vet..."
|
||||
go vet ./...
|
||||
|
||||
.PHONY: go-lint
|
||||
go-lint: golangci-lint ## Run golangci-lint linter.
|
||||
@echo "Running golangci-lint run..."
|
||||
$(LOCALBIN)/$(GOLANGCI_LINT) run --timeout 5m ./...
|
||||
|
||||
.PHONY: go-lint-fix
|
||||
go-lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes.
|
||||
@echo "Running golangci-lint run --fix..."
|
||||
$(LOCALBIN)/$(GOLANGCI_LINT) run --fix --timeout 5m ./...
|
||||
|
||||
.PHONY: unit-test
|
||||
unit-test: ## Run go unit tests.
|
||||
@echo "Running go test..."
|
||||
go test $(shell go list ./... | grep -v /e2e) -coverprofile cover.out
|
||||
|
||||
.PHONY: e2e-test
|
||||
e2e-test: envtest ## Run the e2e tests against a Kind k8s instance that is spun up.
|
||||
@echo "Running e2e tests..."
|
||||
go test ./test/e2e/ -v -ginkgo.v -timeout 30m
|
||||
|
||||
# Build the project
|
||||
.PHONY: default
|
||||
default:
|
||||
ifeq ($(OS),Windows_NT)
|
||||
default: cli-windows
|
||||
default: arena-windows
|
||||
else
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Linux)
|
||||
$(info "Building on Linux")
|
||||
default: cli-linux-amd64
|
||||
default: arena-linux-amd64
|
||||
else ifeq ($(UNAME_S),Darwin)
|
||||
$(info "Building on Darwin")
|
||||
default: cli-darwin
|
||||
default: arena-darwin-amd64
|
||||
else
|
||||
$(error "The OS is not supported")
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY: cli-linux-amd64
|
||||
cli-linux-amd64:
|
||||
mkdir -p bin
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -tags 'netgo' -ldflags '${LDFLAGS}' -o ${DIST_DIR}/${ARENA_CLI_NAME} cmd/arena/*.go
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags '${LDFLAGS}' -o ${DIST_DIR}/${JOB_MONITOR} cmd/job-monitor/*.go
|
||||
##@ Build
|
||||
|
||||
.PHONY: cli-darwin
|
||||
cli-darwin:
|
||||
mkdir -p bin
|
||||
CGO_ENABLED=0 GOOS=darwin go build -tags 'netgo' -ldflags '${LDFLAGS}' -o ${DIST_DIR}/${ARENA_CLI_NAME} ./cmd/arena/*.go
|
||||
$(LOCALBIN):
|
||||
mkdir -p $(LOCALBIN)
|
||||
|
||||
.PHONY: cli-windows
|
||||
cli-windows:
|
||||
mkdir -p bin
|
||||
CGO_ENABLED=0 GOARCH=amd64 GOOS=windows go build -tags 'netgo' -ldflags '${LDFLAGS}' -o ${DIST_DIR}/${ARENA_CLI_NAME} ./cmd/arena/*.go
|
||||
$(TEMPDIR):
|
||||
mkdir -p $(TEMPDIR)
|
||||
|
||||
clean: ## Clean up all downloaded and generated files.
|
||||
rm -rf $(LOCALBIN) $(TEMPDIR)
|
||||
|
||||
.PHONY: install-image
|
||||
install-image:
|
||||
docker build -t cheyang/arena:${VERSION}-${DOCKER_BUILD_DATE}-${GIT_SHORT_COMMIT} -f Dockerfile.install .
|
||||
.PHONY: arena
|
||||
arena: $(LOCALBIN) ## Build arena CLI for current platform.
|
||||
@echo "Building arena CLI..."
|
||||
CGO_ENABLED=0 GOOS=$(OS) GOARCH=$(ARCH) go build -tags netgo -ldflags '${LDFLAGS}' -o $(LOCALBIN)/$(ARENA) cmd/arena/main.go
|
||||
|
||||
.PHONY: java-sdk
|
||||
java-sdk: ## Build Java SDK.
|
||||
echo "Building arena Java SDK..."
|
||||
mvn package -Dmaven.test.skip=true -Dgpg.skip -f sdk/arena-java-sdk
|
||||
|
||||
.PHONY: docker-build
|
||||
docker-build: ## Build docker image.
|
||||
docker build \
|
||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||
--tag $(IMAGE) \
|
||||
-f Dockerfile \
|
||||
.
|
||||
|
||||
.PHONY: docker-push
|
||||
docker-push: ## Push docker image.
|
||||
docker push $(IMAGE)
|
||||
|
||||
.PHONY: docker-buildx
|
||||
PLATFORMS ?= linux/amd64,linux/arm64
|
||||
docker-buildx: ## Build and push docker images for multiple platforms.
|
||||
- $(CONTAINER_TOOL) buildx create --name arena-builder
|
||||
$(CONTAINER_TOOL) buildx use arena-builder
|
||||
- $(CONTAINER_TOOL) buildx build --push \
|
||||
--platform=$(PLATFORMS) \
|
||||
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
|
||||
--tag $(IMAGE) \
|
||||
-f Dockerfile \
|
||||
.
|
||||
- $(CONTAINER_TOOL) buildx rm arena-builder
|
||||
|
||||
.PHONY: notebook-image-kubeflow
|
||||
notebook-image-kubeflow:
|
||||
|
@ -102,3 +226,107 @@ notebook-image-kubeflow:
|
|||
notebook-image:
|
||||
docker build --build-arg "BASE_IMAGE=tensorflow/tensorflow:1.12.0-devel-py3" -t cheyang/arena:${VERSION}-notebook-${DOCKER_BUILD_DATE}-${GIT_SHORT_COMMIT}-cpu -f Dockerfile.notebook.cpu .
|
||||
docker tag cheyang/arena:${VERSION}-notebook-${DOCKER_BUILD_DATE}-${GIT_SHORT_COMMIT}-cpu cheyang/arena-notebook:cpu
|
||||
|
||||
.PHONY: build-dependabot
|
||||
build-dependabot:
|
||||
python3 hack/create_dependabot.py
|
||||
|
||||
.PHONY: arena-installer
|
||||
arena-installer: $(ARENA_INSTALLER_TARBALL) ## Build arena installer tarball
|
||||
$(ARENA_INSTALLER_TARBALL): arena kubectl helm
|
||||
echo "Building arena installer tarball..." && \
|
||||
rm -rf $(TEMPDIR)/$(ARENA_INSTALLER) && \
|
||||
mkdir -p $(TEMPDIR)/$(ARENA_INSTALLER)/bin && \
|
||||
cp $(LOCALBIN)/$(ARENA) $(TEMPDIR)/$(ARENA_INSTALLER)/bin/arena && \
|
||||
cp $(LOCALBIN)/$(KUBECTL) $(TEMPDIR)/$(ARENA_INSTALLER)/bin/kubectl && \
|
||||
cp $(LOCALBIN)/$(HELM) $(TEMPDIR)/$(ARENA_INSTALLER)/bin/helm && \
|
||||
cp -R charts $(TEMPDIR)/$(ARENA_INSTALLER) && \
|
||||
cp -R arena-artifacts $(TEMPDIR)/$(ARENA_INSTALLER) && \
|
||||
cp arena-gen-kubeconfig.sh $(TEMPDIR)/$(ARENA_INSTALLER)/bin && \
|
||||
cp install.sh $(TEMPDIR)/$(ARENA_INSTALLER) && \
|
||||
cp uninstall.sh $(TEMPDIR)/$(ARENA_INSTALLER)/bin/arena-uninstall && \
|
||||
tar -zcf $(ARENA_INSTALLER).tar.gz -C $(TEMPDIR) $(ARENA_INSTALLER) && \
|
||||
echo "Successfully saved arena installer to $(ARENA_INSTALLER).tar.gz."
|
||||
|
||||
##@ Helm
|
||||
|
||||
.PHONY: helm-unittest
|
||||
helm-unittest: helm-unittest-plugin ## Run Helm chart unittests.
|
||||
set -x && $(LOCALBIN)/$(HELM) unittest $(ARENA_ARTIFACTS_CHART_PATH) --strict --file "tests/**/*_test.yaml" --chart-tests-path $(CURRENT_DIR)
|
||||
|
||||
##@ Dependencies
|
||||
|
||||
.PHONY: golangci-lint
|
||||
golangci-lint: $(LOCALBIN)/$(GOLANGCI_LINT) ## Download golangci-lint locally if necessary.
|
||||
$(LOCALBIN)/$(GOLANGCI_LINT): $(LOCALBIN)
|
||||
$(call go-install-tool,$(LOCALBIN)/$(GOLANGCI_LINT),github.com/golangci/golangci-lint/v2/cmd/golangci-lint,${GOLANGCI_LINT_VERSION})
|
||||
|
||||
.PHONY: envtest
|
||||
envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
|
||||
$(ENVTEST): $(LOCALBIN)
|
||||
$(call go-install-tool,$(ENVTEST),sigs.k8s.io/controller-runtime/tools/setup-envtest,$(ENVTEST_VERSION))
|
||||
|
||||
.PHONY: kubectl
|
||||
kubectl: $(LOCALBIN)/$(KUBECTL)
|
||||
$(LOCALBIN)/$(KUBECTL): $(LOCALBIN) $(TEMPDIR)
|
||||
$(eval KUBECTL_URL=https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/$(OS)/$(ARCH)/kubectl)
|
||||
$(eval KUBECTL_SHA_URL=$(KUBECTL_URL).sha256)
|
||||
|
||||
cd $(TEMPDIR) && \
|
||||
echo "Download $(KUBECTL) if not present..." && \
|
||||
if [ ! -f $(KUBECTL) ]; then \
|
||||
curl -sSLo $(KUBECTL) $(KUBECTL_URL); \
|
||||
fi && \
|
||||
echo "Download $(KUBECTL).sha256 if not present..." && \
|
||||
if [ ! -f kubectl.sha256 ]; then \
|
||||
curl -sSLo $(KUBECTL).sha256 $(KUBECTL_SHA_URL); \
|
||||
fi && \
|
||||
echo "Verifying checksum..." && \
|
||||
echo -n "$$(cat $(KUBECTL).sha256) $(KUBECTL)" | shasum -a 256 --check --quiet || (echo "Checksum verification failed, exiting." && false) && \
|
||||
echo "Make kubectl executable and move it to bin directory..." && \
|
||||
chmod +x $(KUBECTL) && \
|
||||
cp $(KUBECTL) $(LOCALBIN) && \
|
||||
echo "Successfully installed kubectl to $(LOCALBIN)/$(KUBECTL)."
|
||||
|
||||
.PHONY: helm
|
||||
helm: $(LOCALBIN)/$(HELM)
|
||||
$(LOCALBIN)/$(HELM): $(LOCALBIN) $(TEMPDIR)
|
||||
$(eval HELM_URL=https://get.helm.sh/$(HELM).tar.gz)
|
||||
$(eval HELM_SHA_URL=https://get.helm.sh/$(HELM).tar.gz.sha256sum)
|
||||
|
||||
cd $(TEMPDIR) && \
|
||||
echo "Download $(HELM).tar.gz if not present..." && \
|
||||
if [ ! -f $(HELM).tar.gz ]; then \
|
||||
wget -qO $(HELM).tar.gz $(HELM_URL); \
|
||||
fi && \
|
||||
echo "Download $(HELM).tar.gz.sha256sum if not present..." && \
|
||||
if [ ! -f $(HELM).tar.gz.sha256sum ]; then \
|
||||
wget -qO $(HELM).tar.gz.sha256sum $(HELM_SHA_URL); \
|
||||
fi && \
|
||||
echo "Verifying checksum..." && \
|
||||
cat $(HELM).tar.gz.sha256sum | shasum -a 256 --check --quiet || (echo "Checksum verification failed, exiting." && false) && \
|
||||
echo "Extract helm tarball and move it to bin directory..." && \
|
||||
tar -zxf $(HELM).tar.gz && \
|
||||
cp ${OS}-${ARCH}/helm $(LOCALBIN)/$(HELM) && \
|
||||
echo "Successfully installed helm to $(LOCALBIN)/$(HELM)."
|
||||
|
||||
.PHONY: helm-unittest-plugin
|
||||
helm-unittest-plugin: helm ## Download helm unittest plugin locally if necessary.
|
||||
if [ -z "$(shell $(LOCALBIN)/$(HELM) plugin list | grep unittest)" ]; then \
|
||||
echo "Installing helm unittest plugin"; \
|
||||
$(LOCALBIN)/$(HELM) plugin install https://github.com/helm-unittest/helm-unittest.git --version $(HELM_UNITTEST_VERSION); \
|
||||
fi
|
||||
|
||||
# go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist
|
||||
# $1 - target path with name of binary (ideally with version)
|
||||
# $2 - package url which can be installed
|
||||
# $3 - specific version of package
|
||||
define go-install-tool
|
||||
@[ -f $(1) ] || { \
|
||||
set -e; \
|
||||
package=$(2)@$(3) ;\
|
||||
echo "Downloading $${package}" ;\
|
||||
GOBIN=$(LOCALBIN) go install $${package} ;\
|
||||
mv "$$(echo "$(1)" | sed "s/-$(3)$$//")" $(1) ;\
|
||||
}
|
||||
endef
|
||||
|
|
9
OWNERS
9
OWNERS
|
@ -1,8 +1,11 @@
|
|||
approvers:
|
||||
- cheyang
|
||||
- wsxiaozhang
|
||||
- denverdino
|
||||
- Syulin7
|
||||
- xieydd
|
||||
- denkensk
|
||||
- gujingit
|
||||
- ChenYi015
|
||||
reviewers:
|
||||
- GarnettWang
|
||||
- wsxiaozhang
|
||||
- xiaozhouX
|
||||
- osswangxining
|
||||
|
|
43
README.md
43
README.md
|
@ -1,9 +1,8 @@
|
|||
# Arena
|
||||
|
||||
[](https://circleci.com/gh/kubeflow/arena)
|
||||
[](https://travis-ci.org/kubeflow/arena)
|
||||
[](https://goreportcard.com/report/github.com/kubeflow/arena)
|
||||
[](https://github.com/kubeflow/arena/releases) [](https://github.com/kubeflow/arena/actions/workflows/integration.yaml) [](https://goreportcard.com/report/github.com/kubeflow/arena)
|
||||
|
||||
View the [Arena documentation](https://arena-docs.readthedocs.io/en/latest).
|
||||
|
||||
## Overview
|
||||
|
||||
|
@ -17,26 +16,15 @@ For the Chinese version, please refer to [中文文档](README_cn.md)
|
|||
|
||||
## Setup
|
||||
|
||||
You can follow up the [Installation guide](docs/installation/README.md)
|
||||
You can follow up the [Installation guide](https://arena-docs.readthedocs.io/en/latest/installation)
|
||||
|
||||
## User Guide
|
||||
|
||||
Arena is a command-line interface to run and monitor the machine learning training jobs and check their results in an easy way. Currently it supports solo/distributed training.
|
||||
|
||||
- [1. Run a training Job with source code from git](docs/userguide/1-tfjob-standalone.md)
|
||||
- [2. Run a training Job with tensorboard](docs/userguide/2-tfjob-tensorboard.md)
|
||||
- [3. Run a distributed training Job](docs/userguide/3-tfjob-distributed.md)
|
||||
- [4. Run a distributed training Job with external data](docs/userguide/4-tfjob-distributed-data.md)
|
||||
- [5. Run a distributed training Job based on MPI](docs/userguide/5-mpijob-distributed.md)
|
||||
- [6. Run a distributed TensorFlow training job with gang scheduler](docs/userguide/6-tfjob-gangschd.md)
|
||||
- [7. Run TensorFlow Serving](docs/userguide/7-tf-serving.md)
|
||||
- [8. Run TensorFlow Estimator](docs/userguide/8-tfjob-estimator.md)
|
||||
- [9. Monitor GPUs of the training job ](docs/userguide/9-top-job-gpu-metric.md)
|
||||
Arena is a command-line interface to run and monitor the machine learning training jobs and check their results in an easy way. Please refer the [User Guide](https://arena-docs.readthedocs.io/en/latest/training) to manage your training jobs.
|
||||
|
||||
## Demo
|
||||
|
||||
[](http://cloud.video.taobao.com/play/u/2987821887/p/1/e/6/t/1/50210690772.mp4)
|
||||
|
||||
[](http://cloud.video.taobao.com/play/u/2987821887/p/1/e/6/t/1/50210690772.mp4)
|
||||
|
||||
## Developing
|
||||
|
||||
|
@ -44,9 +32,9 @@ Prerequisites:
|
|||
|
||||
- Go >= 1.8
|
||||
|
||||
```
|
||||
mkdir -p $GOPATH/src/github.com/kubeflow
|
||||
cd $GOPATH/src/github.com/kubeflow
|
||||
```shell
|
||||
mkdir -p $(go env GOPATH)/src/github.com/kubeflow
|
||||
cd $(go env GOPATH)/src/github.com/kubeflow
|
||||
git clone https://github.com/kubeflow/arena.git
|
||||
cd arena
|
||||
make
|
||||
|
@ -54,9 +42,11 @@ make
|
|||
|
||||
`arena` binary is located in directory `arena/bin`. You may want to add the directory to `$PATH`.
|
||||
|
||||
Then you can follow [Installation guide for developer](https://arena-docs.readthedocs.io/en/latest/installation)
|
||||
|
||||
## CPU Profiling
|
||||
|
||||
```
|
||||
```shell
|
||||
# set profile rate (HZ)
|
||||
export PROFILE_RATE=1000
|
||||
|
||||
|
@ -67,11 +57,18 @@ INFO[0000] Dump cpu profile file into /tmp/cpu_profile
|
|||
|
||||
Then you can analyze the profile by following [Go CPU profiling: pprof and speedscope](https://coder.today/go-profiling-pprof-and-speedscope-b05b812cc429)
|
||||
|
||||
## Adopters
|
||||
|
||||
If you are interested in Arena and would like to share your experiences with others, you are warmly welcome to add your information on [ADOPTERS.md](docs/about/ADOPTERS.md) page. We will continuously discuss new requirements and feature design with you in advance.
|
||||
|
||||
## FAQ
|
||||
|
||||
Please refer to [FAQ](https://arena-docs.readthedocs.io/en/latest/faq).
|
||||
|
||||
## CLI Document
|
||||
|
||||
Please refer to [arena.md](docs/cli/arena.md)
|
||||
Please refer to [arena.md](docs/cli/arena.md).
|
||||
|
||||
## RoadMap
|
||||
|
||||
See [RoadMap](ROADMAP.md)
|
||||
See [RoadMap](ROADMAP.md).
|
||||
|
|
33
README_cn.md
33
README_cn.md
|
@ -1,9 +1,6 @@
|
|||
# Arena
|
||||
|
||||
[](https://circleci.com/gh/kubeflow/arena)
|
||||
[](https://travis-ci.org/kubeflow/arena)
|
||||
[](https://goreportcard.com/report/github.com/kubeflow/arena)
|
||||
|
||||
[](https://github.com/kubeflow/arena/actions/workflows/integration.yaml)[](https://goreportcard.com/report/github.com/kubeflow/arena)
|
||||
|
||||
## 概述
|
||||
|
||||
|
@ -13,28 +10,25 @@ Arena 是一个命令行工具,可供数据科学家轻而易举地运行和
|
|||
|
||||
简而言之,Arena 的目标是让数据科学家感觉自己就像是在一台机器上工作,而实际上还可以享受到 GPU 集群的强大力量。
|
||||
|
||||
|
||||
## 设置
|
||||
|
||||
您可以按照 [安装指南](docs/installation_cn/README.md) 执行操作
|
||||
您可以按照 [安装指南](https://arena-docs.readthedocs.io/en/latest/installation) 执行操作
|
||||
|
||||
## 用户指南
|
||||
|
||||
Arena 是一种命令行界面,支持轻而易举地运行和监控机器学习训练作业,并便捷地检查结果。目前,它支持独立/分布式训练。
|
||||
|
||||
- [1.使用 git 上的源代码运行训练作业](docs/userguide_cn/1-tfjob-standalone.md)
|
||||
- [2.使用 tensorboard 运行训练作业](docs/userguide_cn/2-tfjob-tensorboard.md)
|
||||
- [3.运行分布式训练作业](docs/userguide_cn/3-tfjob-distributed.md)
|
||||
- [4.使用外部数据运行分布式训练作业](docs/userguide_cn/4-tfjob-distributed-data.md)
|
||||
- [5.运行基于 MPI 的分布式训练作业](docs/userguide_cn/5-mpijob-distributed.md)
|
||||
- [6.使用群调度器运行分布式 TensorFlow 训练作业](docs/userguide_cn/6-tfjob-gangschd.md)
|
||||
- [7.运行 TensorFlow Serving](docs/userguide_cn/7-tf-serving.md)
|
||||
- [8.运行 TensorFlow Estimator](docs/userguide_cn/8-tfjob-estimator.md)
|
||||
- [1.使用 git 上的源代码运行训练作业](https://arena-docs.readthedocs.io/en/latest/training/tfjob/standalone/)
|
||||
- [2.使用 tensorboard 运行训练作业](https://arena-docs.readthedocs.io/en/latest/training/tfjob/tensorboard/)
|
||||
- [3.运行分布式训练作业](https://arena-docs.readthedocs.io/en/latest/training/tfjob/distributed/)
|
||||
- [4.使用外部数据运行分布式训练作业](https://arena-docs.readthedocs.io/en/latest/training/tfjob/dataset/)
|
||||
- [5.运行基于 MPI 的分布式训练作业](https://arena-docs.readthedocs.io/en/latest/training/mpijob/distributed/)
|
||||
- [6.使用群调度器运行分布式 TensorFlow 训练作业](https://arena-docs.readthedocs.io/en/latest/training/etjob/elastictraining-tensorflow2-mnist/)
|
||||
- [7.运行 TensorFlow Serving](https://arena-docs.readthedocs.io/en/latest/serving/tfserving/serving/)
|
||||
|
||||
## 演示
|
||||
|
||||
[](http://cloud.video.taobao.com/play/u/2987821887/p/1/e/6/t/1/50210690772.mp4)
|
||||
|
||||
[](http://cloud.video.taobao.com/play/u/2987821887/p/1/e/6/t/1/50210690772.mp4)
|
||||
|
||||
## 开发
|
||||
|
||||
|
@ -42,9 +36,9 @@ Arena 是一种命令行界面,支持轻而易举地运行和监控机器学
|
|||
|
||||
- Go >= 1.8
|
||||
|
||||
```
|
||||
mkdir -p $GOPATH/src/github.com/kubeflow
|
||||
cd $GOPATH/src/github.com/kubeflow
|
||||
```shell
|
||||
mkdir -p $(go env GOPATH)/src/github.com/kubeflow
|
||||
cd $(go env GOPATH)/src/github.com/kubeflow
|
||||
git clone https://github.com/kubeflow/arena.git
|
||||
cd arena
|
||||
make
|
||||
|
@ -59,4 +53,3 @@ make
|
|||
## 路线图
|
||||
|
||||
请参阅[路线图](ROADMAP.md)
|
||||
|
||||
|
|
40
ROADMAP.md
40
ROADMAP.md
|
@ -1,10 +1,40 @@
|
|||
# Arena 2019 Roadmap
|
||||
# Kubeflow Arena Roadmap
|
||||
|
||||
## Kubeflow Arena 2024 Roadmap
|
||||
|
||||
This document defines a high level roadmap for Arena development.
|
||||
|
||||
### 2019
|
||||
* Objective:Simplify the user experience by deeply integrating with the Kubeflow Ecosystem.
|
||||
* Kubeflow Integration
|
||||
* Prepare Arena for release v1.0.0 alongside kubeflow v1.10.
|
||||
* Develop a seamless integration with the Training Operator to help simplify model training using command line.
|
||||
* Integrate with Kubeflow Pipelines to facilitate model training from a Pipeline.
|
||||
* Enable mode serving with KServe.
|
||||
* Add documentation to Kubeflow website:
|
||||
* Installation, uninstallation, and upgrade processes.
|
||||
* Guide for tfjob, mpijob, pytorchJob.
|
||||
|
||||
#### Core CUJs
|
||||
* Objective:Amplify the Extensibility of the Arena for Different ML frameworks, AIGC models and platforms.
|
||||
* Support DeepSpeed Training Job.
|
||||
* Support for submitting and managing llm fine-tuning jobs, like DeepSpeed.
|
||||
* Enable model serving for an expanded set of models like Baichuan, LLaMA, ChatGLM, Falcon, and more.
|
||||
* Extend platform support to include ARM.
|
||||
* Integrate [Fluid project](https://github.com/fluid-cloudnative/fluid) for efficient data management.
|
||||
* Add support for Ray Job management with [Kuberay](https://github.com/ray-project/kuberay).
|
||||
|
||||
* Objective: Boost Performance and Stability.
|
||||
* Regularly publish recommended practices documentation.
|
||||
* Enhancements on Arena SDK.
|
||||
* Enhance code quality by:
|
||||
* Reduce repetitive code.
|
||||
* Enhance unit test.
|
||||
* Implement automated End-to-End Test:
|
||||
* Add integration tests using GitHub Actions.
|
||||
* Strive for more than 60% Test Coverage of Supported Features.
|
||||
|
||||
## Kubeflow Arena 2019 Roadmap
|
||||
|
||||
### Core CUJs
|
||||
|
||||
Objectives: "Make Arena easily to be integrated with External System."
|
||||
|
||||
|
@ -19,13 +49,13 @@ Objectives: "Simplify the user experience of the data scientists and provide a l
|
|||
* Submit and manage Model Serving with [KF Serving](https://github.com/kubeflow/kfserving)
|
||||
|
||||
|
||||
Objectives: "Make Arena support the same Operator compatiable with different API version, so the upgrade of operator doesn't impact the existing users' experiences."
|
||||
Objectives: "Make Arena support the same Operator compatible with different API version, so the upgrade of operator doesn't impact the existing users' experiences."
|
||||
|
||||
* Compatibility:
|
||||
* v1aphla2 and v1 TFJob
|
||||
* v1alpha1 and v1aphla2 MPIJob
|
||||
|
||||
Objectives: "Enchance the software quality of Arena so it can be in the quick iteration"
|
||||
Objectives: "Enhance the software quality of Arena so it can be in the quick iteration"
|
||||
|
||||
* Refactor the source code
|
||||
* Move Training implementation from `cmd` into `pkg`
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
|
||||
# helm-unittest
|
||||
tests
|
||||
.debug
|
||||
__snapshot__
|
|
@ -0,0 +1,67 @@
|
|||
apiVersion: v2
|
||||
name: arena-artifacts
|
||||
description: A Helm chart for installing arena dependencies
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.15.1
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: 0.15.1
|
||||
|
||||
dependencies:
|
||||
- name: tf-operator
|
||||
alias: tf
|
||||
version: 0.1.0
|
||||
repository: "@tf-operator"
|
||||
condition: tf.enabled,global.tf.enabled
|
||||
- name: tf-dashboard
|
||||
alias: tfdashboard
|
||||
version: 0.1.0
|
||||
repository: "@tf-dashbard"
|
||||
condition: tfdashboard.enabled,global.tfdashboard.enabled
|
||||
- name: cron-operator
|
||||
alias: cron
|
||||
version: 0.1.0
|
||||
repository: "@cron-operator"
|
||||
condition: cron.enabled,global.cron.enabled
|
||||
- name: et-operator
|
||||
alias: et
|
||||
version: 0.1.1
|
||||
repository: "@et-operator"
|
||||
condition: et.enabled,global.et.enabled
|
||||
- name: mpi-operator
|
||||
alias: mpi
|
||||
version: 0.1.0
|
||||
repository: "@mpi-operator"
|
||||
condition: mpi.enabled,global.mpi.enabled
|
||||
- name: pytorch-operator
|
||||
alias: pytorch
|
||||
version: 0.1.0
|
||||
repository: "@pytorch-operator"
|
||||
condition: pytorch.enabled,global.pytorch.enabled
|
||||
- name: gpu-exporter
|
||||
alias: exporter
|
||||
version: 0.1.0
|
||||
repository: "@gpu-exporter"
|
||||
condition: exporter.enabled,global.exporter.enabled
|
||||
- name: elastic-job-supervisor
|
||||
alias: elastic-job-supervisor
|
||||
version: 0.1.0
|
||||
repository: "@elastic-job-supervisor"
|
||||
condition: elastic-job-supervisor.enabled,global.elastic-job-supervisor.enabled
|
||||
|
|
@ -0,0 +1,223 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.6.0
|
||||
git-repo: http://gitlab.alibaba-inc.com/kube-ai/kubedlpro.git
|
||||
git-branch: feature/k8s-1.22
|
||||
git-commit: 4f076d22
|
||||
creationTimestamp: null
|
||||
name: crons.apps.kubedl.io
|
||||
spec:
|
||||
group: apps.kubedl.io
|
||||
names:
|
||||
kind: Cron
|
||||
listKind: CronList
|
||||
plural: crons
|
||||
singular: cron
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- additionalPrinterColumns:
|
||||
- jsonPath: .status.conditions[-1:].type
|
||||
name: State
|
||||
type: string
|
||||
- jsonPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: Cron is the Schema for the crons API
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: CronSpec defines the desired state of Cron
|
||||
properties:
|
||||
concurrencyPolicy:
|
||||
description: 'Specifies how to treat concurrent executions of a Task.
|
||||
Valid values are: - "Allow" (default): allows CronJobs to run concurrently;
|
||||
- "Forbid": forbids concurrent runs, skipping next run if previous
|
||||
run hasn''t finished yet; - "Replace": cancels currently running
|
||||
job and replaces it with a new one'
|
||||
type: string
|
||||
deadline:
|
||||
description: Deadline is the timestamp that a cron job can keep scheduling
|
||||
util then.
|
||||
format: date-time
|
||||
type: string
|
||||
historyLimit:
|
||||
description: The number of finished job history to retain. This is
|
||||
a pointer to distinguish between explicit zero and not specified.
|
||||
format: int32
|
||||
type: integer
|
||||
schedule:
|
||||
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
|
||||
type: string
|
||||
suspend:
|
||||
description: This flag tells the controller to suspend subsequent
|
||||
executions, it does not apply to already started executions. Defaults
|
||||
to false.
|
||||
type: boolean
|
||||
template:
|
||||
description: Specifies the job that will be created when executing
|
||||
a CronTask.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this
|
||||
representation of an object. Servers should convert recognized
|
||||
schemas to the latest internal value, and may reject unrecognized
|
||||
values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource
|
||||
this object represents. Servers may infer this from the endpoint
|
||||
the client submits requests to. Cannot be updated. In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
workload:
|
||||
description: Workload is the specification of the desired cron
|
||||
job with specific types.
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
type: object
|
||||
required:
|
||||
- schedule
|
||||
- template
|
||||
type: object
|
||||
status:
|
||||
description: CronStatus defines the observed state of Cron
|
||||
properties:
|
||||
active:
|
||||
description: A list of currently running jobs.
|
||||
items:
|
||||
description: 'ObjectReference contains enough information to let
|
||||
you inspect or modify the referred object. --- New uses of this
|
||||
type are discouraged because of difficulty describing its usage
|
||||
when embedded in APIs. 1. Ignored fields. It includes many fields
|
||||
which are not generally honored. For instance, ResourceVersion
|
||||
and FieldPath are both very rarely valid in actual usage. 2.
|
||||
Invalid usage help. It is impossible to add specific help for
|
||||
individual usage. In most embedded usages, there are particular restrictions
|
||||
like, "must refer only to types A and B" or "UID not honored"
|
||||
or "name must be restricted". Those cannot be well described
|
||||
when embedded. 3. Inconsistent validation. Because the usages
|
||||
are different, the validation rules are different by usage, which
|
||||
makes it hard for users to predict what will happen. 4. The fields
|
||||
are both imprecise and overly precise. Kind is not a precise
|
||||
mapping to a URL. This can produce ambiguity during interpretation
|
||||
and require a REST mapping. In most cases, the dependency is
|
||||
on the group,resource tuple and the version of the actual
|
||||
struct is irrelevant. 5. We cannot easily change it. Because
|
||||
this type is embedded in many locations, updates to this type will
|
||||
affect numerous schemas. Don''t make new APIs embed an underspecified
|
||||
API type they do not control. Instead of using this type, create
|
||||
a locally provided and used type that is well-focused on your
|
||||
reference. For example, ServiceReferences for admission registration:
|
||||
https://github.com/kubernetes/api/blob/release-1.17/admissionregistration/v1/types.go#L533
|
||||
.'
|
||||
properties:
|
||||
apiVersion:
|
||||
description: API version of the referent.
|
||||
type: string
|
||||
fieldPath:
|
||||
description: 'If referring to a piece of an object instead of
|
||||
an entire object, this string should contain a valid JSON/Go
|
||||
field access statement, such as desiredState.manifest.containers[2].
|
||||
For example, if the object reference is to a container within
|
||||
a pod, this would take on a value like: "spec.containers{name}"
|
||||
(where "name" refers to the name of the container that triggered
|
||||
the event) or if no container name is specified "spec.containers[2]"
|
||||
(container with index 2 in this pod). This syntax is chosen
|
||||
only to have some well-defined way of referencing a part of
|
||||
an object. TODO: this design is not final and this field is
|
||||
subject to change in the future.'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
namespace:
|
||||
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
|
||||
type: string
|
||||
resourceVersion:
|
||||
description: 'Specific resourceVersion to which this reference
|
||||
is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
|
||||
type: string
|
||||
uid:
|
||||
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
history:
|
||||
description: History is a list of scheduled cron job with its digest
|
||||
records.
|
||||
items:
|
||||
properties:
|
||||
created:
|
||||
description: Created is the creation timestamp of job.
|
||||
format: date-time
|
||||
type: string
|
||||
finished:
|
||||
description: Finished is the failed or succeeded timestamp of
|
||||
job.
|
||||
format: date-time
|
||||
type: string
|
||||
object:
|
||||
description: Object is the reference of the historical scheduled
|
||||
cron job.
|
||||
properties:
|
||||
apiGroup:
|
||||
description: APIGroup is the group for the resource being
|
||||
referenced. If APIGroup is not specified, the specified
|
||||
Kind must be in the core API group. For any other third-party
|
||||
types, APIGroup is required.
|
||||
type: string
|
||||
kind:
|
||||
description: Kind is the type of resource being referenced
|
||||
type: string
|
||||
name:
|
||||
description: Name is the name of resource being referenced
|
||||
type: string
|
||||
required:
|
||||
- kind
|
||||
- name
|
||||
type: object
|
||||
status:
|
||||
description: Status is the final status when job finished.
|
||||
type: string
|
||||
required:
|
||||
- object
|
||||
- status
|
||||
type: object
|
||||
type: array
|
||||
lastScheduleTime:
|
||||
description: Information when was the last time the job was successfully
|
||||
scheduled.
|
||||
format: date-time
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
|
@ -0,0 +1,186 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.6.0
|
||||
git-repo: https://github.com/AliyunContainerService/et-operator.git
|
||||
git-branch: master
|
||||
git-commit: "1499985"
|
||||
creationTimestamp: null
|
||||
name: scaleins.kai.alibabacloud.com
|
||||
spec:
|
||||
group: kai.alibabacloud.com
|
||||
names:
|
||||
kind: ScaleIn
|
||||
listKind: ScaleInList
|
||||
plural: scaleins
|
||||
singular: scalein
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- additionalPrinterColumns:
|
||||
- jsonPath: .status.conditions[-1:].type
|
||||
name: Phase
|
||||
type: string
|
||||
- jsonPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: ScaleIn is the Schema for the scaleins API
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: ScaleInSpec defines the desired state of ScaleIn
|
||||
properties:
|
||||
backoffLimit:
|
||||
description: Optional number of retries to execute script.
|
||||
format: int32
|
||||
type: integer
|
||||
env:
|
||||
items:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
script:
|
||||
type: string
|
||||
selector:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
timeout:
|
||||
description: Optional number of timeout to execute script.
|
||||
format: int32
|
||||
type: integer
|
||||
toDelete:
|
||||
properties:
|
||||
count:
|
||||
type: integer
|
||||
podNames:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
status:
|
||||
description: Most recently observed status of the PyTorchJob. Read-only
|
||||
(modified by the system).
|
||||
properties:
|
||||
completionTime:
|
||||
description: Represents time when the job was completed. It is not
|
||||
guaranteed to be set in happens-before order across separate operations.
|
||||
It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
conditions:
|
||||
description: Conditions is an array of current observed job conditions.
|
||||
items:
|
||||
description: JobCondition describes the state of the job at a certain
|
||||
point.
|
||||
properties:
|
||||
lastTransitionTime:
|
||||
description: Last time the condition transitioned from one status
|
||||
to another.
|
||||
format: date-time
|
||||
type: string
|
||||
lastUpdateTime:
|
||||
description: The last time this condition was updated.
|
||||
format: date-time
|
||||
type: string
|
||||
message:
|
||||
description: A human readable message indicating details about
|
||||
the transition.
|
||||
type: string
|
||||
reason:
|
||||
description: The reason for the condition's last transition.
|
||||
type: string
|
||||
status:
|
||||
description: Status of the condition, one of True, False, Unknown.
|
||||
type: string
|
||||
type:
|
||||
description: Type of job condition.
|
||||
type: string
|
||||
required:
|
||||
- status
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
currentScaler:
|
||||
description: record scaleout/scalein name when scaling. e.g. (default/scaleout-sample)
|
||||
type: string
|
||||
lastReconcileTime:
|
||||
description: Represents last time when the job was reconciled. It
|
||||
is not guaranteed to be set in happens-before order across separate
|
||||
operations. It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
phase:
|
||||
description: record trainingjob current phase
|
||||
type: string
|
||||
replicaStatuses:
|
||||
additionalProperties:
|
||||
description: ReplicaStatus represents the current observed state
|
||||
of the replica.
|
||||
properties:
|
||||
active:
|
||||
description: The number of actively running pods.
|
||||
format: int32
|
||||
type: integer
|
||||
failed:
|
||||
description: The number of pods which reached phase Failed.
|
||||
format: int32
|
||||
type: integer
|
||||
succeeded:
|
||||
description: The number of pods which reached phase Succeeded.
|
||||
format: int32
|
||||
type: integer
|
||||
type: object
|
||||
description: ReplicaStatuses is map of ReplicaType and ReplicaStatus,
|
||||
specifies the status of each replica.
|
||||
type: object
|
||||
restartCount:
|
||||
description: The number of times the Job has been restarted
|
||||
format: int32
|
||||
type: integer
|
||||
startTime:
|
||||
description: Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
toDeletePods:
|
||||
description: record delete pods for scalein
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
required:
|
||||
- conditions
|
||||
- replicaStatuses
|
||||
- restartCount
|
||||
type: object
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
|
@ -0,0 +1,182 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.6.0
|
||||
git-repo: https://github.com/AliyunContainerService/et-operator.git
|
||||
git-branch: master
|
||||
git-commit: "1499985"
|
||||
creationTimestamp: null
|
||||
name: scaleouts.kai.alibabacloud.com
|
||||
spec:
|
||||
group: kai.alibabacloud.com
|
||||
names:
|
||||
kind: ScaleOut
|
||||
listKind: ScaleOutList
|
||||
plural: scaleouts
|
||||
singular: scaleout
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- additionalPrinterColumns:
|
||||
- jsonPath: .status.conditions[-1:].type
|
||||
name: Phase
|
||||
type: string
|
||||
- jsonPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: ScaleOut is the Schema for the scaleouts API
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: ScaleOutSpec defines the desired state of ScaleOut
|
||||
properties:
|
||||
backoffLimit:
|
||||
description: Optional number of retries to execute script.
|
||||
format: int32
|
||||
type: integer
|
||||
env:
|
||||
items:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
script:
|
||||
type: string
|
||||
selector:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
timeout:
|
||||
description: Optional number of timeout to execute script.
|
||||
format: int32
|
||||
type: integer
|
||||
toAdd:
|
||||
properties:
|
||||
count:
|
||||
format: int32
|
||||
type: integer
|
||||
type: object
|
||||
type: object
|
||||
status:
|
||||
description: Most recently observed status of the PyTorchJob. Read-only
|
||||
(modified by the system).
|
||||
properties:
|
||||
addPods:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
completionTime:
|
||||
description: Represents time when the job was completed. It is not
|
||||
guaranteed to be set in happens-before order across separate operations.
|
||||
It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
conditions:
|
||||
description: Conditions is an array of current observed job conditions.
|
||||
items:
|
||||
description: JobCondition describes the state of the job at a certain
|
||||
point.
|
||||
properties:
|
||||
lastTransitionTime:
|
||||
description: Last time the condition transitioned from one status
|
||||
to another.
|
||||
format: date-time
|
||||
type: string
|
||||
lastUpdateTime:
|
||||
description: The last time this condition was updated.
|
||||
format: date-time
|
||||
type: string
|
||||
message:
|
||||
description: A human readable message indicating details about
|
||||
the transition.
|
||||
type: string
|
||||
reason:
|
||||
description: The reason for the condition's last transition.
|
||||
type: string
|
||||
status:
|
||||
description: Status of the condition, one of True, False, Unknown.
|
||||
type: string
|
||||
type:
|
||||
description: Type of job condition.
|
||||
type: string
|
||||
required:
|
||||
- status
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
currentScaler:
|
||||
description: record scaleout/scalein name when scaling. e.g. (default/scaleout-sample)
|
||||
type: string
|
||||
lastReconcileTime:
|
||||
description: Represents last time when the job was reconciled. It
|
||||
is not guaranteed to be set in happens-before order across separate
|
||||
operations. It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
phase:
|
||||
description: record trainingjob current phase
|
||||
type: string
|
||||
replicaStatuses:
|
||||
additionalProperties:
|
||||
description: ReplicaStatus represents the current observed state
|
||||
of the replica.
|
||||
properties:
|
||||
active:
|
||||
description: The number of actively running pods.
|
||||
format: int32
|
||||
type: integer
|
||||
failed:
|
||||
description: The number of pods which reached phase Failed.
|
||||
format: int32
|
||||
type: integer
|
||||
succeeded:
|
||||
description: The number of pods which reached phase Succeeded.
|
||||
format: int32
|
||||
type: integer
|
||||
type: object
|
||||
description: ReplicaStatuses is map of ReplicaType and ReplicaStatus,
|
||||
specifies the status of each replica.
|
||||
type: object
|
||||
restartCount:
|
||||
description: The number of times the Job has been restarted
|
||||
format: int32
|
||||
type: integer
|
||||
startTime:
|
||||
description: Represents time when the job was acknowledged by the job controller. It is not guaranteed to be set in happens-before order across separate operations. It is represented in RFC3339 form and is in UTC.
|
||||
format: date-time
|
||||
type: string
|
||||
required:
|
||||
- conditions
|
||||
- replicaStatuses
|
||||
- restartCount
|
||||
type: object
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,223 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.6.0
|
||||
git-repo: http://gitlab.alibaba-inc.com/kube-ai/kubedlpro.git
|
||||
git-branch: feature/k8s-1.22
|
||||
git-commit: 4f076d22
|
||||
creationTimestamp: null
|
||||
name: crons.apps.kubedl.io
|
||||
spec:
|
||||
group: apps.kubedl.io
|
||||
names:
|
||||
kind: Cron
|
||||
listKind: CronList
|
||||
plural: crons
|
||||
singular: cron
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- additionalPrinterColumns:
|
||||
- jsonPath: .status.conditions[-1:].type
|
||||
name: State
|
||||
type: string
|
||||
- jsonPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
name: v1alpha1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: Cron is the Schema for the crons API
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this representation
|
||||
of an object. Servers should convert recognized schemas to the latest
|
||||
internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource this
|
||||
object represents. Servers may infer this from the endpoint the client
|
||||
submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: CronSpec defines the desired state of Cron
|
||||
properties:
|
||||
concurrencyPolicy:
|
||||
description: 'Specifies how to treat concurrent executions of a Task.
|
||||
Valid values are: - "Allow" (default): allows CronJobs to run concurrently;
|
||||
- "Forbid": forbids concurrent runs, skipping next run if previous
|
||||
run hasn''t finished yet; - "Replace": cancels currently running
|
||||
job and replaces it with a new one'
|
||||
type: string
|
||||
deadline:
|
||||
description: Deadline is the timestamp that a cron job can keep scheduling
|
||||
util then.
|
||||
format: date-time
|
||||
type: string
|
||||
historyLimit:
|
||||
description: The number of finished job history to retain. This is
|
||||
a pointer to distinguish between explicit zero and not specified.
|
||||
format: int32
|
||||
type: integer
|
||||
schedule:
|
||||
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
|
||||
type: string
|
||||
suspend:
|
||||
description: This flag tells the controller to suspend subsequent
|
||||
executions, it does not apply to already started executions. Defaults
|
||||
to false.
|
||||
type: boolean
|
||||
template:
|
||||
description: Specifies the job that will be created when executing
|
||||
a CronTask.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: 'APIVersion defines the versioned schema of this
|
||||
representation of an object. Servers should convert recognized
|
||||
schemas to the latest internal value, and may reject unrecognized
|
||||
values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind is a string value representing the REST resource
|
||||
this object represents. Servers may infer this from the endpoint
|
||||
the client submits requests to. Cannot be updated. In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
workload:
|
||||
description: Workload is the specification of the desired cron
|
||||
job with specific types.
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
type: object
|
||||
required:
|
||||
- schedule
|
||||
- template
|
||||
type: object
|
||||
status:
|
||||
description: CronStatus defines the observed state of Cron
|
||||
properties:
|
||||
active:
|
||||
description: A list of currently running jobs.
|
||||
items:
|
||||
description: 'ObjectReference contains enough information to let
|
||||
you inspect or modify the referred object. --- New uses of this
|
||||
type are discouraged because of difficulty describing its usage
|
||||
when embedded in APIs. 1. Ignored fields. It includes many fields
|
||||
which are not generally honored. For instance, ResourceVersion
|
||||
and FieldPath are both very rarely valid in actual usage. 2.
|
||||
Invalid usage help. It is impossible to add specific help for
|
||||
individual usage. In most embedded usages, there are particular restrictions
|
||||
like, "must refer only to types A and B" or "UID not honored"
|
||||
or "name must be restricted". Those cannot be well described
|
||||
when embedded. 3. Inconsistent validation. Because the usages
|
||||
are different, the validation rules are different by usage, which
|
||||
makes it hard for users to predict what will happen. 4. The fields
|
||||
are both imprecise and overly precise. Kind is not a precise
|
||||
mapping to a URL. This can produce ambiguity during interpretation
|
||||
and require a REST mapping. In most cases, the dependency is
|
||||
on the group,resource tuple and the version of the actual
|
||||
struct is irrelevant. 5. We cannot easily change it. Because
|
||||
this type is embedded in many locations, updates to this type will
|
||||
affect numerous schemas. Don''t make new APIs embed an underspecified
|
||||
API type they do not control. Instead of using this type, create
|
||||
a locally provided and used type that is well-focused on your
|
||||
reference. For example, ServiceReferences for admission registration:
|
||||
https://github.com/kubernetes/api/blob/release-1.17/admissionregistration/v1/types.go#L533
|
||||
.'
|
||||
properties:
|
||||
apiVersion:
|
||||
description: API version of the referent.
|
||||
type: string
|
||||
fieldPath:
|
||||
description: 'If referring to a piece of an object instead of
|
||||
an entire object, this string should contain a valid JSON/Go
|
||||
field access statement, such as desiredState.manifest.containers[2].
|
||||
For example, if the object reference is to a container within
|
||||
a pod, this would take on a value like: "spec.containers{name}"
|
||||
(where "name" refers to the name of the container that triggered
|
||||
the event) or if no container name is specified "spec.containers[2]"
|
||||
(container with index 2 in this pod). This syntax is chosen
|
||||
only to have some well-defined way of referencing a part of
|
||||
an object. TODO: this design is not final and this field is
|
||||
subject to change in the future.'
|
||||
type: string
|
||||
kind:
|
||||
description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
|
||||
type: string
|
||||
name:
|
||||
description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names'
|
||||
type: string
|
||||
namespace:
|
||||
description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/'
|
||||
type: string
|
||||
resourceVersion:
|
||||
description: 'Specific resourceVersion to which this reference
|
||||
is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency'
|
||||
type: string
|
||||
uid:
|
||||
description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids'
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
history:
|
||||
description: History is a list of scheduled cron job with its digest
|
||||
records.
|
||||
items:
|
||||
properties:
|
||||
created:
|
||||
description: Created is the creation timestamp of job.
|
||||
format: date-time
|
||||
type: string
|
||||
finished:
|
||||
description: Finished is the failed or succeeded timestamp of
|
||||
job.
|
||||
format: date-time
|
||||
type: string
|
||||
object:
|
||||
description: Object is the reference of the historical scheduled
|
||||
cron job.
|
||||
properties:
|
||||
apiGroup:
|
||||
description: APIGroup is the group for the resource being
|
||||
referenced. If APIGroup is not specified, the specified
|
||||
Kind must be in the core API group. For any other third-party
|
||||
types, APIGroup is required.
|
||||
type: string
|
||||
kind:
|
||||
description: Kind is the type of resource being referenced
|
||||
type: string
|
||||
name:
|
||||
description: Name is the name of resource being referenced
|
||||
type: string
|
||||
required:
|
||||
- kind
|
||||
- name
|
||||
type: object
|
||||
status:
|
||||
description: Status is the final status when job finished.
|
||||
type: string
|
||||
required:
|
||||
- object
|
||||
- status
|
||||
type: object
|
||||
type: array
|
||||
lastScheduleTime:
|
||||
description: Information when was the last time the job was successfully
|
||||
scheduled.
|
||||
format: date-time
|
||||
type: string
|
||||
type: object
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
status:
|
||||
acceptedNames:
|
||||
kind: ""
|
||||
plural: ""
|
||||
conditions: []
|
||||
storedVersions: []
|
|
@ -0,0 +1,231 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: scaleins.kai.alibabacloud.com
|
||||
spec:
|
||||
additionalPrinterColumns:
|
||||
- JSONPath: .status.conditions[-1:].type
|
||||
name: Phase
|
||||
type: string
|
||||
- JSONPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
group: kai.alibabacloud.com
|
||||
names:
|
||||
kind: ScaleIn
|
||||
listKind: ScaleInList
|
||||
plural: scaleins
|
||||
singular: scalein
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
apiVersion:
|
||||
type: string
|
||||
kind:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
properties:
|
||||
backoffLimit:
|
||||
format: int32
|
||||
type: integer
|
||||
env:
|
||||
items:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
script:
|
||||
type: string
|
||||
selector:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
timeout:
|
||||
format: int32
|
||||
type: integer
|
||||
toDelete:
|
||||
properties:
|
||||
count:
|
||||
type: integer
|
||||
podNames:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
type: object
|
||||
type: object
|
||||
type: object
|
||||
version: v1alpha1
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: scaleouts.kai.alibabacloud.com
|
||||
spec:
|
||||
additionalPrinterColumns:
|
||||
- JSONPath: .status.conditions[-1:].type
|
||||
name: Phase
|
||||
type: string
|
||||
- JSONPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
group: kai.alibabacloud.com
|
||||
names:
|
||||
kind: ScaleOut
|
||||
listKind: ScaleOutList
|
||||
plural: scaleouts
|
||||
singular: scaleout
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
apiVersion:
|
||||
type: string
|
||||
kind:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
properties:
|
||||
backoffLimit:
|
||||
format: int32
|
||||
type: integer
|
||||
env:
|
||||
items:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
value:
|
||||
type: string
|
||||
type: object
|
||||
type: array
|
||||
script:
|
||||
type: string
|
||||
selector:
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
type: object
|
||||
timeout:
|
||||
format: int32
|
||||
type: integer
|
||||
toAdd:
|
||||
properties:
|
||||
count:
|
||||
format: int32
|
||||
type: integer
|
||||
type: object
|
||||
type: object
|
||||
type: object
|
||||
version: v1alpha1
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.4.1
|
||||
creationTimestamp: null
|
||||
name: trainingjobs.kai.alibabacloud.com
|
||||
spec:
|
||||
additionalPrinterColumns:
|
||||
- JSONPath: .status.phase
|
||||
name: Phase
|
||||
type: string
|
||||
- JSONPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
group: kai.alibabacloud.com
|
||||
names:
|
||||
kind: TrainingJob
|
||||
listKind: TrainingJobList
|
||||
plural: trainingjobs
|
||||
singular: trainingjob
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
apiVersion:
|
||||
type: string
|
||||
kind:
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
properties:
|
||||
cleanPodPolicy:
|
||||
type: string
|
||||
etReplicaSpecs:
|
||||
properties:
|
||||
launcher:
|
||||
properties:
|
||||
replicas:
|
||||
format: int32
|
||||
maximum: 1
|
||||
minimum: 1
|
||||
type: integer
|
||||
restartPolicy:
|
||||
type: string
|
||||
type: object
|
||||
worker:
|
||||
properties:
|
||||
maxReplicas:
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
minReplicas:
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
replicas:
|
||||
format: int32
|
||||
minimum: 1
|
||||
type: integer
|
||||
restartPolicy:
|
||||
type: string
|
||||
type: object
|
||||
required:
|
||||
- launcher
|
||||
- worker
|
||||
type: object
|
||||
x-kubernetes-preserve-unknown-fields: true
|
||||
launcherAttachMode:
|
||||
type: string
|
||||
slotsPerWorker:
|
||||
format: int32
|
||||
type: integer
|
||||
required:
|
||||
- etReplicaSpecs
|
||||
type: object
|
||||
type: object
|
||||
version: v1alpha1
|
||||
versions:
|
||||
- name: v1alpha1
|
||||
served: true
|
||||
storage: true
|
|
@ -0,0 +1,47 @@
|
|||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: mpijobs.kubeflow.org
|
||||
spec:
|
||||
group: kubeflow.org
|
||||
version: v1alpha1
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
names:
|
||||
plural: mpijobs
|
||||
singular: mpijob
|
||||
kind: MPIJob
|
||||
shortNames:
|
||||
- mj
|
||||
- mpij
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
spec:
|
||||
title: The MPIJob spec
|
||||
description: Either `gpus` or `replicas` should be specified, but not both
|
||||
oneOf:
|
||||
- properties:
|
||||
gpus:
|
||||
title: Total number of GPUs
|
||||
description: Valid values are 1, 2, 4, or any multiple of 8
|
||||
oneOf:
|
||||
- type: integer
|
||||
enum:
|
||||
- 1
|
||||
- 2
|
||||
- 4
|
||||
- type: integer
|
||||
multipleOf: 8
|
||||
minimum: 8
|
||||
required:
|
||||
- gpus
|
||||
- properties:
|
||||
replicas:
|
||||
title: Total number of replicas
|
||||
description: The GPU resource limit should be specified for each replica
|
||||
type: integer
|
||||
minimum: 1
|
||||
required:
|
||||
- replicas
|
|
@ -0,0 +1,43 @@
|
|||
---
|
||||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
name: pytorchjobs.kubeflow.org
|
||||
spec:
|
||||
additionalPrinterColumns:
|
||||
- JSONPath: .status.conditions[-1:].type
|
||||
name: State
|
||||
type: string
|
||||
- JSONPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
group: kubeflow.org
|
||||
names:
|
||||
kind: PyTorchJob
|
||||
plural: pytorchjobs
|
||||
singular: pytorchjob
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
spec:
|
||||
properties:
|
||||
pytorchReplicaSpecs:
|
||||
properties:
|
||||
Master:
|
||||
properties:
|
||||
replicas:
|
||||
maximum: 1
|
||||
minimum: 1
|
||||
type: integer
|
||||
Worker:
|
||||
properties:
|
||||
replicas:
|
||||
minimum: 1
|
||||
type: integer
|
||||
versions:
|
||||
- name: v1
|
||||
served: true
|
||||
storage: true
|
|
@ -0,0 +1,53 @@
|
|||
apiVersion: apiextensions.k8s.io/v1beta1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
name: tfjobs.kubeflow.org
|
||||
spec:
|
||||
additionalPrinterColumns:
|
||||
- JSONPath: .status.conditions[-1:].type
|
||||
name: State
|
||||
type: string
|
||||
- JSONPath: .metadata.creationTimestamp
|
||||
name: Age
|
||||
type: date
|
||||
group: kubeflow.org
|
||||
names:
|
||||
kind: TFJob
|
||||
plural: tfjobs
|
||||
singular: tfjob
|
||||
scope: Namespaced
|
||||
subresources:
|
||||
status: {}
|
||||
validation:
|
||||
openAPIV3Schema:
|
||||
properties:
|
||||
spec:
|
||||
properties:
|
||||
tfReplicaSpecs:
|
||||
properties:
|
||||
Chief:
|
||||
properties:
|
||||
replicas:
|
||||
maximum: 1
|
||||
minimum: 1
|
||||
type: integer
|
||||
PS:
|
||||
properties:
|
||||
replicas:
|
||||
minimum: 1
|
||||
type: integer
|
||||
Worker:
|
||||
properties:
|
||||
replicas:
|
||||
minimum: 1
|
||||
type: integer
|
||||
version: v1
|
||||
versions:
|
||||
- name: v1
|
||||
served: true
|
||||
storage: true
|
||||
- name: v1alpha2
|
||||
served: true
|
||||
storage: false
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: cron-operator
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v0.1.1"
|
|
@ -0,0 +1,74 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: cron-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 25%
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
spec:
|
||||
containers:
|
||||
- name: cron
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
args:
|
||||
- --workloads=Cron
|
||||
ports:
|
||||
- containerPort: 8443
|
||||
name: metrics
|
||||
protocol: TCP
|
||||
{{- with .Values.resources }}
|
||||
resources:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.useHostTimezone }}
|
||||
volumeMounts:
|
||||
- name: volume-localtime
|
||||
mountPath: /etc/localtime
|
||||
readOnly: true
|
||||
{{- end }}
|
||||
{{- if .Values.useHostTimezone }}
|
||||
volumes:
|
||||
- name: volume-localtime
|
||||
hostPath:
|
||||
path: /etc/localtime
|
||||
{{- end }}
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 18 }}
|
||||
topologyKey: kubernetes.io/hostname
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
serviceAccountName: cron-operator
|
|
@ -0,0 +1,267 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: cron-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: cron-operator-role
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- tfjobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- tfjobs/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- pytorchjobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- pytorchjobs/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- xdl.kubedl.io
|
||||
resources:
|
||||
- xdljobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- xdl.kubedl.io
|
||||
resources:
|
||||
- xdljobs/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- xgboostjob.kubeflow.org
|
||||
resources:
|
||||
- xgboostjobs
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- xgboostjob.kubeflow.org
|
||||
resources:
|
||||
- xgboostjobs/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods/status
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods/status
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- persistentvolumeclaims
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- controllerrevisions
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- admissionregistration.k8s.io
|
||||
resources:
|
||||
- mutatingwebhookconfigurations
|
||||
- validatingwebhookconfigurations
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- services
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- "apps.kubedl.io"
|
||||
resources:
|
||||
- crons
|
||||
- crons/status
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: cron-operator-rolebinding
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cron-operator-role
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cron-operator
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: cron-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: metrics
|
||||
protocol: TCP
|
||||
name: metrics
|
||||
selector:
|
||||
app: cron-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
|
@ -0,0 +1,21 @@
|
|||
# Default values for cron-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# -- Replicas of cron-operator deployment.
|
||||
replicas: 1
|
||||
|
||||
# -- Whether to use host timezone in the container.
|
||||
useHostTimezone: false
|
||||
|
||||
# -- Resources for cron-operator pods.
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 2Gi
|
||||
|
||||
# -- Tolerations for cron-operator pods.
|
||||
tolerations: []
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: elastic-job-supervisor
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 1.2.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v1.2.0"
|
|
@ -0,0 +1,50 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: elastic-job-supervisor
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: elastic-job-supervisor
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: elastic-job-supervisor
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
strategy:
|
||||
rollingUpdate:
|
||||
maxSurge: 25%
|
||||
maxUnavailable: 25%
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
app: elastic-job-supervisor
|
||||
spec:
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- command:
|
||||
- /job-supervisor
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
name: elastic-job-supervisor
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
serviceAccount: elastic-job-supervisor
|
||||
serviceAccountName: elastic-job-supervisor
|
||||
terminationGracePeriodSeconds: 30
|
|
@ -0,0 +1,64 @@
|
|||
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: elastic-job-supervisor
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
|
||||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: elastic-job-supervisor
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- events
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- '*'
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- '*'
|
||||
verbs:
|
||||
- '*'
|
||||
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: elastic-job-supervisor
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: elastic-job-supervisor
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: elastic-job-supervisor
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,3 @@
|
|||
# Default values for elastic-job-supervisor
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: et-operator
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.1
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v0.1.1"
|
|
@ -0,0 +1,46 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
app: et-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: et-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: et-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
app: et-operator
|
||||
spec:
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- args:
|
||||
- --enable-leader-election
|
||||
- --create-ssh-secret={{ .Values.createSSHSecret }}
|
||||
- --init-container-image={{ .Values.initContainerImage }}
|
||||
command:
|
||||
- /manager
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
name: manager
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
serviceAccountName: et-operator
|
||||
terminationGracePeriodSeconds: 10
|
|
@ -0,0 +1,255 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app: et-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: et-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: et-operator-leader-election
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
- update
|
||||
- patch
|
||||
- delete
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps/status
|
||||
verbs:
|
||||
- get
|
||||
- update
|
||||
- patch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
name: et-operator
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods/exec
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- serviceaccounts
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- services
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- services/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- scaleins
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- scaleins/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- scaleouts
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- scaleouts/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- trainingjobs
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- kai.alibabacloud.com
|
||||
resources:
|
||||
- trainingjobs/status
|
||||
verbs:
|
||||
- get
|
||||
- patch
|
||||
- update
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- rolebindings
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- roles
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
{{- if .Values.createSSHSecret }}
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- secrets
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- create
|
||||
{{- end }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: et-operator-leader-election
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: et-operator-leader-election
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: et-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: et-operator
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: et-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: et-operator
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,3 @@
|
|||
# Default values for et-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: gpu-exporter
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.0.0"
|
|
@ -0,0 +1,69 @@
|
|||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: ack-prometheus-gpu-exporter
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: aliyun.accelerator/nvidia_name
|
||||
operator: Exists
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- env:
|
||||
- name: MY_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: spec.nodeName
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
securityContext:
|
||||
privileged: true
|
||||
name: node-gpu-exporter
|
||||
ports:
|
||||
- containerPort: 9445
|
||||
name: http-metrics
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 300m
|
||||
memory: 300Mi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 50Mi
|
||||
volumeMounts:
|
||||
- mountPath: /var/run/docker.sock
|
||||
name: docker-sock
|
||||
hostPID: true
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /var/run/docker.sock
|
||||
type: File
|
||||
name: docker-sock
|
|
@ -0,0 +1,17 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: node-gpu-exporter
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 9445
|
||||
protocol: TCP
|
||||
targetPort: 9445
|
||||
selector:
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
type: ClusterIP
|
|
@ -0,0 +1,22 @@
|
|||
{{- if .Capabilities.APIVersions.Has "monitoring.coreos.com/v1" -}}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: node-gpu-exporter
|
||||
labels:
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
k8s-app: ack-prometheus-gpu-exporter
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
# any: true
|
||||
endpoints:
|
||||
- port: http-metrics
|
||||
interval: "45s"
|
||||
path: /metrics
|
||||
{{- end }}
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: mpi-operator
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v1.0.0-aliyun"
|
|
@ -0,0 +1,46 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: mpi-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: mpi-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: mpi-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: mpi-operator
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
spec:
|
||||
serviceAccountName: mpi-operator
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- name: mpi-operator
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
args:
|
||||
- --gpus-per-node
|
||||
- "8"
|
||||
- --kubectl-delivery-image
|
||||
- {{ include "arena.imagePrefix" . }}/{{ .Values.kubectlDelivery.image }}:{{ .Values.kubectlDelivery.tag }}
|
||||
- --alsologtostderr
|
||||
- --v=5
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 10 }}
|
|
@ -0,0 +1,103 @@
|
|||
---
|
||||
kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: mpi-operator
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- serviceaccounts
|
||||
verbs:
|
||||
- create
|
||||
- list
|
||||
- watch
|
||||
# This is needed for the launcher Role.
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
# This is needed for the launcher Role.
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods/exec
|
||||
verbs:
|
||||
- create
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- patch
|
||||
- apiGroups:
|
||||
- rbac.authorization.k8s.io
|
||||
resources:
|
||||
- roles
|
||||
- rolebindings
|
||||
verbs:
|
||||
- create
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apps
|
||||
resources:
|
||||
- statefulsets
|
||||
verbs:
|
||||
- create
|
||||
- list
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- jobs
|
||||
verbs:
|
||||
- create
|
||||
- list
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- create
|
||||
- get
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- mpijobs
|
||||
- mpijobs/status
|
||||
verbs:
|
||||
- "*"
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: mpi-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
---
|
||||
kind: ClusterRoleBinding
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: mpi-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: mpi-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: mpi-operator
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,4 @@
|
|||
# Default values for mpi-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: pytorch-operator
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "v0.7.0"
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: pytorch-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
name: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
annotations:
|
||||
sidecar.istio.io/inject: "false"
|
||||
spec:
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- command:
|
||||
- /pytorch-operator.v1
|
||||
- --alsologtostderr
|
||||
- -v=1
|
||||
- --monitoring-port=8443
|
||||
- --threadiness=4
|
||||
- --init-container-image={{ .Values.initContainerImage }}
|
||||
# image: gcr.io/kubeflow-images-public/pytorch-operator:v0.6.0-18-g5e36a57
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
name: pytorch-operator
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
serviceAccountName: pytorch-operator
|
|
@ -0,0 +1,70 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: pytorch-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: pytorch-operator
|
||||
rules:
|
||||
- apiGroups:
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- pytorchjobs
|
||||
- pytorchjobs/status
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- pods
|
||||
- services
|
||||
- endpoints
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: pytorch-operator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: pytorch-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: pytorch-operator
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
prometheus.io/path: /metrics
|
||||
prometheus.io/port: "8443"
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: pytorch-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
ports:
|
||||
- name: monitoring-port
|
||||
port: 8443
|
||||
targetPort: 8443
|
||||
selector:
|
||||
name: pytorch-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
type: ClusterIP
|
|
@ -0,0 +1,3 @@
|
|||
# Default values for pytorch-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: tf-dashboard
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.16.0"
|
|
@ -0,0 +1,47 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-dashboard
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
name: tf-job-dashboard
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
spec:
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
containers:
|
||||
- command:
|
||||
- /opt/tensorflow_k8s/dashboard/backend
|
||||
env:
|
||||
- name: KUBEFLOW_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
name: tf-job-dashboard
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
serviceAccountName: tf-job-dashboard
|
|
@ -0,0 +1,83 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app: tf-job-dashboard
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-dashboard
|
||||
namespace: {{ .Release.Namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app: tf-job-dashboard
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-dashboard
|
||||
rules:
|
||||
- apiGroups:
|
||||
- tensorflow.org
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- tfjobs
|
||||
- tfjobs/status
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- jobs
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- pods
|
||||
- services
|
||||
- endpoints
|
||||
- persistentvolumeclaims
|
||||
- events
|
||||
- pods/log
|
||||
- namespaces
|
||||
verbs:
|
||||
- '*'
|
||||
- apiGroups:
|
||||
- apps
|
||||
- extensions
|
||||
resources:
|
||||
- deployments
|
||||
verbs:
|
||||
- '*'
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app: tf-job-dashboard
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-dashboard
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: tf-job-dashboard
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: tf-job-dashboard
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,26 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
getambassador.io/config: |-
|
||||
---
|
||||
apiVersion: ambassador/v0
|
||||
kind: Mapping
|
||||
name: tfjobs-ui-mapping
|
||||
prefix: /tfjobs/
|
||||
rewrite: /tfjobs/
|
||||
service: tf-job-dashboard.kubeflow
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-dashboard
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 8080
|
||||
selector:
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
kustomize.component: tf-job-operator
|
||||
name: tf-job-dashboard
|
||||
type: ClusterIP
|
|
@ -0,0 +1,3 @@
|
|||
# Default values for tf-dashboard.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
|
@ -0,0 +1,23 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,24 @@
|
|||
apiVersion: v2
|
||||
name: tf-operator
|
||||
description: A Helm chart for Kubernetes
|
||||
|
||||
# A chart can be either an 'application' or a 'library' chart.
|
||||
#
|
||||
# Application charts are a collection of templates that can be packaged into versioned archives
|
||||
# to be deployed.
|
||||
#
|
||||
# Library charts provide useful utilities or functions for the chart developer. They're included as
|
||||
# a dependency of application charts to inject those utilities and functions into the rendering
|
||||
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
|
||||
type: application
|
||||
|
||||
# This is the chart version. This version number should be incremented each time you make changes
|
||||
# to the chart and its templates, including the app version.
|
||||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
version: 0.1.0
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
# follow Semantic Versioning. They should reflect the version the application is using.
|
||||
# It is recommended to use it with quotes.
|
||||
appVersion: "1.0.0"
|
|
@ -0,0 +1,13 @@
|
|||
apiVersion: v1
|
||||
data:
|
||||
controller_config_file.yaml: |-
|
||||
{
|
||||
"grpcServerFilePath": "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py"
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-operator-config
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,71 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: tf-job-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicas }}
|
||||
selector:
|
||||
matchLabels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
kustomize.component: tf-job-operator
|
||||
name: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
spec:
|
||||
containers:
|
||||
- name: tf-job-operator
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.image }}:{{ .Values.tag }}
|
||||
imagePullPolicy: {{ .Values.imagePullPolicy }}
|
||||
command:
|
||||
- /opt/kubeflow/tf-operator.v1
|
||||
- --alsologtostderr
|
||||
- -v=1
|
||||
- --monitoring-port=8443
|
||||
- --threadiness=4
|
||||
env:
|
||||
- name: MY_POD_NAMESPACE
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.namespace
|
||||
- name: MY_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
volumeMounts:
|
||||
- name: config-volume
|
||||
mountPath: /etc/config
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 10 }}
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: tf-job-operator-config
|
||||
nodeSelector:
|
||||
{{- include "arena.nodeSelector" . | nindent 8 }}
|
||||
{{- include "arena.nonEdgeNodeSelector" . | nindent 8 }}
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 18 }}
|
||||
topologyKey: kubernetes.io/hostname
|
||||
tolerations:
|
||||
{{- with .Values.global.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{- . | toYaml | nindent 6 }}
|
||||
{{- end }}
|
||||
{{- include "arena.tolerateNonEdgeNodeSelector" . | nindent 6 }}
|
||||
serviceAccountName: tf-job-operator
|
|
@ -0,0 +1,102 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: tf-job-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app: tf-job-operator
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: tf-job-operator
|
||||
labels:
|
||||
app: tf-job-operator
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
rules:
|
||||
- apiGroups:
|
||||
- tensorflow.org
|
||||
- kubeflow.org
|
||||
resources:
|
||||
- tfjobs
|
||||
- tfjobs/status
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- apiextensions.k8s.io
|
||||
resources:
|
||||
- customresourcedefinitions
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
- apiGroups:
|
||||
- storage.k8s.io
|
||||
resources:
|
||||
- storageclasses
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- batch
|
||||
resources:
|
||||
- jobs
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
- pods
|
||||
- services
|
||||
- endpoints
|
||||
- persistentvolumeclaims
|
||||
- events
|
||||
verbs:
|
||||
- create
|
||||
- delete
|
||||
- get
|
||||
- list
|
||||
- patch
|
||||
- update
|
||||
- watch
|
||||
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: tf-job-operator
|
||||
labels:
|
||||
app: tf-job-operator
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: tf-job-operator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: tf-job-operator
|
||||
namespace: {{ .Release.Namespace }}
|
|
@ -0,0 +1,23 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
annotations:
|
||||
prometheus.io/path: /metrics
|
||||
prometheus.io/port: "8443"
|
||||
prometheus.io/scrape: "true"
|
||||
labels:
|
||||
app: tf-job-operator
|
||||
kustomize.component: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
name: tf-job-operator
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
ports:
|
||||
- name: monitoring-port
|
||||
port: 8443
|
||||
targetPort: 8443
|
||||
selector:
|
||||
kustomize.component: tf-job-operator
|
||||
name: tf-job-operator
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
type: ClusterIP
|
|
@ -0,0 +1,6 @@
|
|||
# Default values for tf-operator.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# -- Replicas of tf-operator deployment.
|
||||
replicas: 1
|
|
@ -0,0 +1,5 @@
|
|||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
- role: worker
|
|
@ -0,0 +1,2 @@
|
|||
global:
|
||||
imagePrefix: registry-us-east-1.ack.aliyuncs.com
|
|
@ -0,0 +1,49 @@
|
|||
{{- define "arena.imagePrefix" -}}
|
||||
{{- if eq .Values.global.clusterProfile "Edge" }}
|
||||
{{- .Values.global.imagePrefix }}
|
||||
{{- else if .Values.global.pullImageByVPCNetwork }}
|
||||
{{- .Values.global.imagePrefix | replace "registry." "registry-vpc." }}
|
||||
{{- else }}
|
||||
{{- .Values.global.imagePrefix }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "arena.nodeSelector" }}
|
||||
{{- range $nodeKey,$nodeVal := .Values.nodeSelector }}
|
||||
{{ $nodeKey }}: "{{ $nodeVal }}"
|
||||
{{- end }}
|
||||
{{- range $nodeKey,$nodeVal := .Values.global.nodeSelector }}
|
||||
{{ $nodeKey }}: "{{ $nodeVal }}"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "arena.nonEdgeNodeSelector" }}
|
||||
{{- if eq .Values.global.clusterProfile "Edge" }}
|
||||
alibabacloud.com/is-edge-worker: "false"
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "arena.tolerateNonEdgeNodeSelector" }}
|
||||
{{- if eq .Values.global.clusterProfile "Edge" }}
|
||||
- key: node-role.alibabacloud.com/addon
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "arena.version" }}
|
||||
{{- .Values.binary.tag }}
|
||||
{{- end }}
|
||||
|
||||
{{- define "arena.labels" -}}
|
||||
helm.sh/chart: arena-artifacts
|
||||
app.kubeflow.org/managed-by: arena
|
||||
{{- end }}
|
||||
|
||||
{{- define "crd.api" }}
|
||||
{{- if .Capabilities.APIVersions.Has "apiextensions.k8s.io/v1beta1" -}}
|
||||
v1beta1
|
||||
{{- else -}}
|
||||
v1
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,10 @@
|
|||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: arena-config
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubeflow.org: arena
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
data:
|
||||
adminUsers: ""
|
|
@ -0,0 +1,93 @@
|
|||
{{- if .Values.binary.enabled }}
|
||||
{{- if gt (int .Values.binary.masterCount) 0 }}
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
name: binary-installer-{{ include "arena.version" . }}
|
||||
labels:
|
||||
app: binary-installer
|
||||
name: binary-installer-{{ include "arena.version" . }}
|
||||
{{- include "arena.labels" . | nindent 4 }}
|
||||
spec:
|
||||
parallelism: {{ .Values.binary.masterCount }}
|
||||
backoffLimit: {{ .Values.binary.retry }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: binary-installer
|
||||
name: binary-installer-{{ include "arena.version" . }}
|
||||
{{- include "arena.labels" . | nindent 8 }}
|
||||
spec:
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node-role.kubernetes.io/control-plane
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node-role.kubernetes.io/master
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
key: node.cloudprovider.kubernetes.io/uninitialized
|
||||
- key: node-role.alibabacloud.com/addon
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
restartPolicy: Never
|
||||
containers:
|
||||
- name: installer
|
||||
image: {{ include "arena.imagePrefix" . }}/{{ .Values.binary.image }}:{{ .Values.binary.tag }}
|
||||
imagePullPolicy: {{ .Values.binary.imagePullPolicy }}
|
||||
securityContext:
|
||||
privileged: true
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
rm -rf /usr/local/arena-installer/arena-installer
|
||||
cp -a /root/arena-installer /usr/local/arena-installer
|
||||
options='--only-binary --region-id {{ include "arena.imagePrefix" . }}'
|
||||
{{- if .Values.binary.hostNetwork }}
|
||||
options="$options --host-network"
|
||||
{{- end }}
|
||||
{{- if .Values.binary.rdma }}
|
||||
options="$options --rdma"
|
||||
{{- end }}
|
||||
nsenter -t 1 -i -p -n -u -m -- /usr/local/arena-installer/arena-installer/install.sh $options
|
||||
env:
|
||||
volumeMounts:
|
||||
- name: arena-installer
|
||||
mountPath: /usr/local/arena-installer
|
||||
- name: kube
|
||||
mountPath: /root/.kube
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /usr/local/arena-installer
|
||||
type: DirectoryOrCreate
|
||||
name: arena-installer
|
||||
- hostPath:
|
||||
path: /root/.kube
|
||||
type: Directory
|
||||
name: kube
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
- labelSelector:
|
||||
matchExpressions:
|
||||
- key: name
|
||||
operator: In
|
||||
values:
|
||||
- binary-installer-{{ include "arena.version" . }}
|
||||
topologyKey: "kubernetes.io/hostname"
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
- matchExpressions:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
{{- end }}
|
||||
{{- end }}
|
|
@ -0,0 +1,114 @@
|
|||
#
|
||||
# Copyright 2025 The Kubeflow authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
suite: Test cron operator deployment
|
||||
|
||||
templates:
|
||||
- charts/cron/templates/operator-dp.yaml
|
||||
|
||||
release:
|
||||
name: arena-artifacts
|
||||
namespace: arena-system
|
||||
|
||||
set:
|
||||
cron:
|
||||
enabled: true
|
||||
|
||||
tests:
|
||||
- it: Should add tolerations if `global.tolerations` is set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if `cron.tolerations` is set
|
||||
set:
|
||||
cron:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if both `global.tolerations` and `cron.tolerations` are set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
cron:
|
||||
tolerations:
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
|
@ -0,0 +1,110 @@
|
|||
#
|
||||
# Copyright 2025 The Kubeflow authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
suite: Test elastic job supervisor deployment
|
||||
|
||||
templates:
|
||||
- charts/elastic-job-supervisor/templates/deployment.yaml
|
||||
|
||||
release:
|
||||
name: arena-artifacts
|
||||
namespace: arena-system
|
||||
|
||||
tests:
|
||||
- it: Should add tolerations if `global.tolerations` is set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if `elastic-job-supervisor.tolerations` is set
|
||||
set:
|
||||
elastic-job-supervisor:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if both `global.tolerations` and `elastic-job-supervisor.tolerations` are set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
elastic-job-supervisor:
|
||||
tolerations:
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
|
@ -0,0 +1,114 @@
|
|||
#
|
||||
# Copyright 2025 The Kubeflow authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
suite: Test et operator deployment
|
||||
|
||||
templates:
|
||||
- charts/et/templates/operator-dp.yaml
|
||||
|
||||
release:
|
||||
name: arena-artifacts
|
||||
namespace: arena-system
|
||||
|
||||
set:
|
||||
et:
|
||||
enabled: true
|
||||
|
||||
tests:
|
||||
- it: Should add tolerations if `global.tolerations` is set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if `et.tolerations` is set
|
||||
set:
|
||||
et:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if both `global.tolerations` and `et.tolerations` are set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
et:
|
||||
tolerations:
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
|
@ -0,0 +1,114 @@
|
|||
#
|
||||
# Copyright 2025 The Kubeflow authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
suite: Test gpu exporter deployment
|
||||
|
||||
templates:
|
||||
- charts/exporter/templates/gpu-exporter-dp.yaml
|
||||
|
||||
release:
|
||||
name: arena-artifacts
|
||||
namespace: arena-system
|
||||
|
||||
set:
|
||||
exporter:
|
||||
enabled: true
|
||||
|
||||
tests:
|
||||
- it: Should add tolerations if `global.tolerations` is set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if `exporter.tolerations` is set
|
||||
set:
|
||||
exporter:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if both `global.tolerations` and `exporter.tolerations` are set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
exporter:
|
||||
tolerations:
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
|
@ -0,0 +1,114 @@
|
|||
#
|
||||
# Copyright 2025 The Kubeflow authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
suite: Test mpi operator deployment
|
||||
|
||||
templates:
|
||||
- charts/mpi/templates/operator-dp.yaml
|
||||
|
||||
release:
|
||||
name: arena-artifacts
|
||||
namespace: arena-system
|
||||
|
||||
set:
|
||||
mpi:
|
||||
enabled: true
|
||||
|
||||
tests:
|
||||
- it: Should add tolerations if `global.tolerations` is set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if `mpi.tolerations` is set
|
||||
set:
|
||||
mpi:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
- it: Should add tolerations if both `global.tolerations` and `mpi.tolerations` are set
|
||||
set:
|
||||
global:
|
||||
tolerations:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
mpi:
|
||||
tolerations:
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
asserts:
|
||||
- equal:
|
||||
path: spec.template.spec.tolerations
|
||||
value:
|
||||
- key: key1
|
||||
operator: Equal
|
||||
value: value1
|
||||
effect: NoSchedule
|
||||
- key: key2
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: key3
|
||||
operator: Equal
|
||||
value: value3
|
||||
effect: NoSchedule
|
||||
- key: key4
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue