mirror of https://github.com/kubeflow/katib.git
Compare commits
9 Commits
Author | SHA1 | Date |
---|---|---|
|
fc6beec835 | |
|
f299a22672 | |
|
f60c76f3de | |
|
2116fc7340 | |
|
1092dba86b | |
|
95e8ef16e3 | |
|
945ae81623 | |
|
0dfb344d08 | |
|
bc5add92d5 |
|
@ -136,6 +136,11 @@ func main() {
|
||||||
ctx := signals.SetupSignalHandler()
|
ctx := signals.SetupSignalHandler()
|
||||||
certsReady := make(chan struct{})
|
certsReady := make(chan struct{})
|
||||||
defer close(certsReady)
|
defer close(certsReady)
|
||||||
|
|
||||||
|
// The setupControllers will register controllers to the manager
|
||||||
|
// after generated certs for the admission webhooks.
|
||||||
|
go setupControllers(mgr, certsReady, hookServer)
|
||||||
|
|
||||||
if initConfig.CertGeneratorConfig.Enable {
|
if initConfig.CertGeneratorConfig.Enable {
|
||||||
if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
|
if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
|
||||||
log.Error(err, "Failed to set up cert-generator")
|
log.Error(err, "Failed to set up cert-generator")
|
||||||
|
@ -144,10 +149,6 @@ func main() {
|
||||||
certsReady <- struct{}{}
|
certsReady <- struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// The setupControllers will register controllers to the manager
|
|
||||||
// after generated certs for the admission webhooks.
|
|
||||||
go setupControllers(mgr, certsReady, hookServer)
|
|
||||||
|
|
||||||
log.Info("Setting up health checker.")
|
log.Info("Setting up health checker.")
|
||||||
if err := mgr.AddReadyzCheck("readyz", hookServer.StartedChecker()); err != nil {
|
if err := mgr.AddReadyzCheck("readyz", hookServer.StartedChecker()); err != nil {
|
||||||
log.Error(err, "Unable to add readyz endpoint to the manager")
|
log.Error(err, "Unable to add readyz endpoint to the manager")
|
||||||
|
|
|
@ -2,4 +2,5 @@ psutil==5.9.4
|
||||||
rfc3339>=6.2
|
rfc3339>=6.2
|
||||||
grpcio>=1.41.1
|
grpcio>=1.41.1
|
||||||
googleapis-common-protos==1.6.0
|
googleapis-common-protos==1.6.0
|
||||||
tensorflow==2.11.0
|
tensorflow==2.13.0
|
||||||
|
protobuf<=3.20.3
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
grpcio>=1.41.1
|
grpcio>=1.41.1
|
||||||
googleapis-common-protos==1.6.0
|
googleapis-common-protos==1.6.0
|
||||||
cython>=0.29.24
|
cython>=0.29.24
|
||||||
tensorflow==2.11.0
|
tensorflow==2.13.0
|
||||||
|
protobuf<=3.20.3
|
||||||
|
|
|
@ -76,7 +76,7 @@ spec:
|
||||||
- name: num-examples
|
- name: num-examples
|
||||||
container:
|
container:
|
||||||
name: model-training
|
name: model-training
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -62,7 +62,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -69,7 +69,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -43,7 +43,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/simple-pbt:latest
|
image: docker.io/kubeflowkatib/simple-pbt:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pbt/pbt_test.py"
|
- "/opt/pbt/pbt_test.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -46,7 +46,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
@ -61,7 +61,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: tensorflow
|
- name: tensorflow
|
||||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
|
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python"
|
- "python"
|
||||||
- "/opt/tf-mnist-with-summaries/mnist.py"
|
- "/opt/tf-mnist-with-summaries/mnist.py"
|
||||||
|
|
|
@ -67,7 +67,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -60,7 +60,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- run_trial.py
|
- run_trial.py
|
||||||
|
|
|
@ -77,7 +77,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:latest
|
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- run_trial.py
|
- run_trial.py
|
||||||
|
|
|
@ -139,7 +139,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
|
|
@ -136,7 +136,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -89,7 +89,7 @@ spec:
|
||||||
description: Number of training examples
|
description: Number of training examples
|
||||||
steps:
|
steps:
|
||||||
- name: model-training
|
- name: model-training
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
scipy>=1.7.2
|
scipy>=1.7.2
|
||||||
tensorflow==2.11.0
|
tensorflow==2.13.0
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
tensorflow==2.11.0
|
tensorflow==2.13.0
|
||||||
|
|
|
@ -60,7 +60,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -15,6 +15,7 @@ spec:
|
||||||
metadata:
|
metadata:
|
||||||
labels:
|
labels:
|
||||||
katib.kubeflow.org/component: controller
|
katib.kubeflow.org/component: controller
|
||||||
|
katib.kubeflow.org/metrics-collector-injection: disabled
|
||||||
annotations:
|
annotations:
|
||||||
prometheus.io/scrape: "true"
|
prometheus.io/scrape: "true"
|
||||||
prometheus.io/port: "8080"
|
prometheus.io/port: "8080"
|
||||||
|
|
|
@ -15,7 +15,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
@ -33,7 +33,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- python3
|
- python3
|
||||||
- -u
|
- -u
|
||||||
|
@ -54,7 +54,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
@ -68,7 +68,7 @@ data:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: pytorch
|
- name: pytorch
|
||||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/pytorch-mnist/mnist.py"
|
- "/opt/pytorch-mnist/mnist.py"
|
||||||
|
|
|
@ -4,4 +4,3 @@ kind: Kustomization
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
- webhooks.yaml
|
- webhooks.yaml
|
||||||
- secret.yaml
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: katib-webhook-cert
|
|
|
@ -63,6 +63,16 @@ webhooks:
|
||||||
namespaceSelector:
|
namespaceSelector:
|
||||||
matchLabels:
|
matchLabels:
|
||||||
katib.kubeflow.org/metrics-collector-injection: enabled
|
katib.kubeflow.org/metrics-collector-injection: enabled
|
||||||
|
# Once the AdmissionWebhookMatchConditions feature gate is enabled by default, we should switch to control based on userInfo.
|
||||||
|
# REF:
|
||||||
|
# - AdmissionWebhookMatchConditions: https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchconditions
|
||||||
|
# - Tracking issue: https://github.com/kubeflow/katib/issues/2206
|
||||||
|
objectSelector:
|
||||||
|
matchExpressions:
|
||||||
|
- key: katib.kubeflow.org/metrics-collector-injection
|
||||||
|
operator: NotIn
|
||||||
|
values:
|
||||||
|
- disabled
|
||||||
rules:
|
rules:
|
||||||
- apiGroups:
|
- apiGroups:
|
||||||
- ""
|
- ""
|
||||||
|
|
|
@ -14,40 +14,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -56,4 +56,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -22,13 +22,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
|
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/katib-cert-injection.yaml
|
- patches/katib-cert-injection.yaml
|
||||||
|
|
|
@ -16,40 +16,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -58,4 +58,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -18,13 +18,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/db-manager.yaml
|
- patches/db-manager.yaml
|
||||||
# Modify katib-mysql-secrets with parameters for the DB.
|
# Modify katib-mysql-secrets with parameters for the DB.
|
||||||
|
@ -32,6 +32,10 @@ secretGenerator:
|
||||||
- name: katib-mysql-secrets
|
- name: katib-mysql-secrets
|
||||||
envs:
|
envs:
|
||||||
- secrets.env
|
- secrets.env
|
||||||
|
# Secret for webhooks certs.
|
||||||
|
- name: katib-webhook-cert
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
configMapGenerator:
|
configMapGenerator:
|
||||||
- name: katib-config
|
- name: katib-config
|
||||||
behavior: create
|
behavior: create
|
||||||
|
|
|
@ -17,40 +17,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -59,4 +59,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -14,40 +14,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -56,4 +56,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -30,13 +30,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
|
|
||||||
patchesJson6902:
|
patchesJson6902:
|
||||||
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
||||||
|
|
|
@ -16,40 +16,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -58,4 +58,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -20,13 +20,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
patchesJson6902:
|
patchesJson6902:
|
||||||
- target:
|
- target:
|
||||||
group: apps
|
group: apps
|
||||||
|
@ -41,3 +41,8 @@ configMapGenerator:
|
||||||
- katib-config.yaml
|
- katib-config.yaml
|
||||||
options:
|
options:
|
||||||
disableNameSuffixHash: true
|
disableNameSuffixHash: true
|
||||||
|
# Secret for webhooks certs.
|
||||||
|
secretGenerator:
|
||||||
|
- name: katib-webhook-cert
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
|
|
@ -16,40 +16,40 @@ init:
|
||||||
runtime:
|
runtime:
|
||||||
metricsCollectors:
|
metricsCollectors:
|
||||||
- kind: StdOut
|
- kind: StdOut
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: File
|
- kind: File
|
||||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||||
- kind: TensorFlowEvent
|
- kind: TensorFlowEvent
|
||||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
suggestions:
|
suggestions:
|
||||||
- algorithmName: random
|
- algorithmName: random
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: tpe
|
- algorithmName: tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||||
- algorithmName: grid
|
- algorithmName: grid
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: hyperband
|
- algorithmName: hyperband
|
||||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||||
- algorithmName: bayesianoptimization
|
- algorithmName: bayesianoptimization
|
||||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||||
- algorithmName: cmaes
|
- algorithmName: cmaes
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: sobol
|
- algorithmName: sobol
|
||||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||||
- algorithmName: multivariate-tpe
|
- algorithmName: multivariate-tpe
|
||||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||||
- algorithmName: enas
|
- algorithmName: enas
|
||||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
memory: 200Mi
|
memory: 200Mi
|
||||||
- algorithmName: darts
|
- algorithmName: darts
|
||||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||||
- algorithmName: pbt
|
- algorithmName: pbt
|
||||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||||
persistentVolumeClaimSpec:
|
persistentVolumeClaimSpec:
|
||||||
accessModes:
|
accessModes:
|
||||||
- ReadWriteMany
|
- ReadWriteMany
|
||||||
|
@ -58,4 +58,4 @@ runtime:
|
||||||
storage: 5Gi
|
storage: 5Gi
|
||||||
earlyStoppings:
|
earlyStoppings:
|
||||||
- algorithmName: medianstop
|
- algorithmName: medianstop
|
||||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||||
|
|
|
@ -20,13 +20,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
configMapGenerator:
|
configMapGenerator:
|
||||||
- name: katib-config
|
- name: katib-config
|
||||||
behavior: create
|
behavior: create
|
||||||
|
@ -34,3 +34,8 @@ configMapGenerator:
|
||||||
- katib-config.yaml
|
- katib-config.yaml
|
||||||
options:
|
options:
|
||||||
disableNameSuffixHash: true
|
disableNameSuffixHash: true
|
||||||
|
# Secret for webhooks certs.
|
||||||
|
secretGenerator:
|
||||||
|
- name: katib-webhook-cert
|
||||||
|
options:
|
||||||
|
disableNameSuffixHash: true
|
||||||
|
|
|
@ -11,13 +11,13 @@ resources:
|
||||||
images:
|
images:
|
||||||
- name: docker.io/kubeflowkatib/katib-controller
|
- name: docker.io/kubeflowkatib/katib-controller
|
||||||
newName: docker.io/kubeflowkatib/katib-controller
|
newName: docker.io/kubeflowkatib/katib-controller
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
- name: docker.io/kubeflowkatib/katib-ui
|
- name: docker.io/kubeflowkatib/katib-ui
|
||||||
newName: docker.io/kubeflowkatib/katib-ui
|
newName: docker.io/kubeflowkatib/katib-ui
|
||||||
newTag: latest
|
newTag: v0.16.0
|
||||||
|
|
||||||
patchesStrategicMerge:
|
patchesStrategicMerge:
|
||||||
- patches/remove-namespace.yaml
|
- patches/remove-namespace.yaml
|
||||||
|
|
|
@ -26,12 +26,15 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"math/big"
|
"math/big"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
|
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
|
||||||
corev1 "k8s.io/api/core/v1"
|
corev1 "k8s.io/api/core/v1"
|
||||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
|
"k8s.io/apimachinery/pkg/util/wait"
|
||||||
"k8s.io/klog"
|
"k8s.io/klog"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/manager"
|
"sigs.k8s.io/controller-runtime/pkg/manager"
|
||||||
|
@ -53,11 +56,11 @@ type CertGenerator struct {
|
||||||
namespace string
|
namespace string
|
||||||
webhookServiceName string
|
webhookServiceName string
|
||||||
webhookSecretName string
|
webhookSecretName string
|
||||||
|
fullServiceDomain string
|
||||||
kubeClient client.Client
|
kubeClient client.Client
|
||||||
certsReady chan struct{}
|
certsReady chan struct{}
|
||||||
|
|
||||||
certs *certificates
|
certs *certificates
|
||||||
fullServiceDomain string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ manager.Runnable = &CertGenerator{}
|
var _ manager.Runnable = &CertGenerator{}
|
||||||
|
@ -67,11 +70,50 @@ func (c *CertGenerator) Start(ctx context.Context) error {
|
||||||
if err := c.generate(ctx); err != nil {
|
if err := c.generate(ctx); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
klog.Info("Waiting for certs to get ready.")
|
||||||
|
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
|
||||||
|
Duration: time.Second,
|
||||||
|
Factor: 2,
|
||||||
|
Jitter: 1,
|
||||||
|
Steps: 10,
|
||||||
|
Cap: time.Minute * 5,
|
||||||
|
}, ensureCertMounted(time.Now())); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
// Sending an empty data to a certsReady means it starts to register controllers to the manager.
|
// Sending an empty data to a certsReady means it starts to register controllers to the manager.
|
||||||
c.certsReady <- struct{}{}
|
c.certsReady <- struct{}{}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ensureCertMounted ensures that the generated certs are mounted inside the container.
|
||||||
|
func ensureCertMounted(start time.Time) func(context.Context) (bool, error) {
|
||||||
|
return func(ctx context.Context) (bool, error) {
|
||||||
|
now := time.Now()
|
||||||
|
outputLog := false
|
||||||
|
if now.Sub(start) >= 15*time.Second {
|
||||||
|
start = now
|
||||||
|
outputLog = true
|
||||||
|
}
|
||||||
|
|
||||||
|
certFile := filepath.Join(consts.CertDir, serverCertName)
|
||||||
|
if _, err := os.Stat(certFile); err != nil {
|
||||||
|
if outputLog {
|
||||||
|
klog.Infof("Public key file %q doesn't exist in the container yet", certFile)
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
keyFile := filepath.Join(consts.CertDir, serverKeyName)
|
||||||
|
if _, err := os.Stat(keyFile); err != nil {
|
||||||
|
if outputLog {
|
||||||
|
klog.Infof("Private key file %q doesn't exist in the container yet", keyFile)
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
klog.Info("Succeeded to be mounted certs inside the container.")
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *CertGenerator) NeedLeaderElection() bool {
|
func (c *CertGenerator) NeedLeaderElection() bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -82,8 +124,13 @@ func AddToManager(mgr manager.Manager, config configv1beta1.CertGeneratorConfig,
|
||||||
namespace: consts.DefaultKatibNamespace,
|
namespace: consts.DefaultKatibNamespace,
|
||||||
webhookServiceName: config.WebhookServiceName,
|
webhookServiceName: config.WebhookServiceName,
|
||||||
webhookSecretName: config.WebhookSecretName,
|
webhookSecretName: config.WebhookSecretName,
|
||||||
kubeClient: mgr.GetClient(),
|
fullServiceDomain: strings.Join([]string{
|
||||||
certsReady: certsReady,
|
config.WebhookServiceName,
|
||||||
|
consts.DefaultKatibNamespace,
|
||||||
|
"svc",
|
||||||
|
}, "."),
|
||||||
|
kubeClient: mgr.GetClient(),
|
||||||
|
certsReady: certsReady,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,8 +146,6 @@ func (c *CertGenerator) generate(ctx context.Context) error {
|
||||||
return fmt.Errorf("%w: %v", errCertCheckFail, err)
|
return fmt.Errorf("%w: %v", errCertCheckFail, err)
|
||||||
}
|
}
|
||||||
if !certExist {
|
if !certExist {
|
||||||
c.fullServiceDomain = strings.Join([]string{c.webhookServiceName, c.namespace, "svc"}, ".")
|
|
||||||
|
|
||||||
if err = c.createCert(); err != nil {
|
if err = c.createCert(); err != nil {
|
||||||
return fmt.Errorf("%w: %v", errCreateCertFail, err)
|
return fmt.Errorf("%w: %v", errCreateCertFail, err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,11 @@ package certgenerator
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
"github.com/google/go-cmp/cmp/cmpopts"
|
"github.com/google/go-cmp/cmp/cmpopts"
|
||||||
|
@ -31,6 +34,7 @@ import (
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
||||||
|
|
||||||
configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
|
configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
|
||||||
|
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestGenerate(t *testing.T) {
|
func TestGenerate(t *testing.T) {
|
||||||
|
@ -210,3 +214,61 @@ func buildFakeClient(kubeResources []client.Object) client.Client {
|
||||||
}
|
}
|
||||||
return fakeClientBuilder.Build()
|
return fakeClientBuilder.Build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestEnsureCertMounted(t *testing.T) {
|
||||||
|
tests := map[string]struct {
|
||||||
|
keyExist bool
|
||||||
|
certExist bool
|
||||||
|
wantExist bool
|
||||||
|
}{
|
||||||
|
"key and cert exist": {
|
||||||
|
keyExist: true,
|
||||||
|
certExist: true,
|
||||||
|
wantExist: true,
|
||||||
|
},
|
||||||
|
"key doesn't exist": {
|
||||||
|
keyExist: false,
|
||||||
|
certExist: true,
|
||||||
|
wantExist: false,
|
||||||
|
},
|
||||||
|
"cert doesn't exist": {
|
||||||
|
keyExist: true,
|
||||||
|
certExist: false,
|
||||||
|
wantExist: false,
|
||||||
|
},
|
||||||
|
"all files doesn't exist": {
|
||||||
|
keyExist: false,
|
||||||
|
certExist: false,
|
||||||
|
wantExist: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for name, tc := range tests {
|
||||||
|
t.Run(name, func(t *testing.T) {
|
||||||
|
if tc.keyExist || tc.certExist {
|
||||||
|
if err := os.MkdirAll(consts.CertDir, 0760); err != nil {
|
||||||
|
t.Fatalf("Failed to set up directory: %v", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if err := os.RemoveAll(consts.CertDir); err != nil {
|
||||||
|
t.Fatalf("Failed to clean up directory: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
if tc.keyExist {
|
||||||
|
if _, err := os.Create(filepath.Join(consts.CertDir, serverKeyName)); err != nil {
|
||||||
|
t.Fatalf("Failed to create tls.key: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if tc.certExist {
|
||||||
|
if _, err := os.Create(filepath.Join(consts.CertDir, serverCertName)); err != nil {
|
||||||
|
t.Fatalf("Failed to create tls.crt: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ensureFunc := ensureCertMounted(time.Now())
|
||||||
|
got, _ := ensureFunc(context.Background())
|
||||||
|
if tc.wantExist != got {
|
||||||
|
t.Errorf("Unexpected value from ensureCertMounted: \n(want: %v, got: %v)\n", tc.wantExist, got)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -22,10 +22,11 @@
|
||||||
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml#L16-L22
|
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml#L16-L22
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
|
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator, TensorEvent
|
||||||
|
from tensorboard.backend.event_processing.tag_types import TENSORS
|
||||||
import os
|
import os
|
||||||
from datetime import datetime
|
|
||||||
import rfc3339
|
import rfc3339
|
||||||
|
from datetime import datetime
|
||||||
import api_pb2
|
import api_pb2
|
||||||
from logging import getLogger, StreamHandler, INFO
|
from logging import getLogger, StreamHandler, INFO
|
||||||
from pkg.metricscollector.v1beta1.common import const
|
from pkg.metricscollector.v1beta1.common import const
|
||||||
|
@ -43,9 +44,9 @@ class TFEventFileParser:
|
||||||
|
|
||||||
def parse_summary(self, tfefile):
|
def parse_summary(self, tfefile):
|
||||||
metric_logs = []
|
metric_logs = []
|
||||||
event_accumulator = EventAccumulator(tfefile, size_guidance={'tensors': 0})
|
event_accumulator = EventAccumulator(tfefile, size_guidance={TENSORS: 0})
|
||||||
event_accumulator.Reload()
|
event_accumulator.Reload()
|
||||||
for tag in event_accumulator.Tags()['tensors']:
|
for tag in event_accumulator.Tags()[TENSORS]:
|
||||||
for m in self.metric_names:
|
for m in self.metric_names:
|
||||||
|
|
||||||
tfefile_parent_dir = os.path.dirname(m) if len(m.split("/")) >= 2 else os.path.dirname(tfefile)
|
tfefile_parent_dir = os.path.dirname(m) if len(m.split("/")) >= 2 else os.path.dirname(tfefile)
|
||||||
|
@ -53,12 +54,12 @@ class TFEventFileParser:
|
||||||
if not tag.startswith(m.split("/")[-1]) or not basedir_name.endswith(tfefile_parent_dir):
|
if not tag.startswith(m.split("/")[-1]) or not basedir_name.endswith(tfefile_parent_dir):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for wall_time, step, tensor in event_accumulator.Tensors(tag):
|
for tensor in event_accumulator.Tensors(tag):
|
||||||
ml = api_pb2.MetricLog(
|
ml = api_pb2.MetricLog(
|
||||||
time_stamp=rfc3339.rfc3339(datetime.fromtimestamp(wall_time)),
|
time_stamp=rfc3339.rfc3339(datetime.fromtimestamp(tensor.wall_time)),
|
||||||
metric=api_pb2.Metric(
|
metric=api_pb2.Metric(
|
||||||
name=m,
|
name=m,
|
||||||
value=str(tf.make_ndarray(tensor))
|
value=str(tf.make_ndarray(tensor.tensor_proto))
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
metric_logs.append(ml)
|
metric_logs.append(ml)
|
||||||
|
|
|
@ -50,8 +50,8 @@ TRIAL_CONDITION_SUCCEEDED = "Succeeded"
|
||||||
|
|
||||||
# Supported base images for the Katib Trials.
|
# Supported base images for the Katib Trials.
|
||||||
# TODO (andreyvelich): Implement list_base_images function to get each image description.
|
# TODO (andreyvelich): Implement list_base_images function to get each image description.
|
||||||
BASE_IMAGE_TENSORFLOW = "docker.io/tensorflow/tensorflow:2.11.0"
|
BASE_IMAGE_TENSORFLOW = "docker.io/tensorflow/tensorflow:2.13.0"
|
||||||
BASE_IMAGE_TENSORFLOW_GPU = "docker.io/tensorflow/tensorflow:2.11.0-gpu"
|
BASE_IMAGE_TENSORFLOW_GPU = "docker.io/tensorflow/tensorflow:2.13.0-gpu"
|
||||||
BASE_IMAGE_PYTORCH = "docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
|
BASE_IMAGE_PYTORCH = "docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
|
||||||
BASE_IMAGE_MXNET = "docker.io/mxnet/python:1.9.1_native_py3"
|
BASE_IMAGE_MXNET = "docker.io/mxnet/python:1.9.1_native_py3"
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,7 @@ if os.path.exists(katib_grpc_api_file):
|
||||||
|
|
||||||
setuptools.setup(
|
setuptools.setup(
|
||||||
name="kubeflow-katib",
|
name="kubeflow-katib",
|
||||||
version="0.15.0",
|
version="0.16.0",
|
||||||
author="Kubeflow Authors",
|
author="Kubeflow Authors",
|
||||||
author_email="premnath.vel@gmail.com",
|
author_email="premnath.vel@gmail.com",
|
||||||
license="Apache License Version 2.0",
|
license="Apache License Version 2.0",
|
||||||
|
|
|
@ -66,7 +66,8 @@ cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd -
|
||||||
|
|
||||||
# Wait until all Katib pods is running.
|
# Wait until all Katib pods is running.
|
||||||
TIMEOUT=120s
|
TIMEOUT=120s
|
||||||
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
|
|
||||||
|
kubectl wait --for=condition=ContainersReady=True --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
|
||||||
(kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)
|
(kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)
|
||||||
|
|
||||||
echo "All Katib components are running."
|
echo "All Katib components are running."
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: training-container
|
- name: training-container
|
||||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||||
command:
|
command:
|
||||||
- "python3"
|
- "python3"
|
||||||
- "/opt/mxnet-mnist/mnist.py"
|
- "/opt/mxnet-mnist/mnist.py"
|
||||||
|
|
Loading…
Reference in New Issue