mirror of https://github.com/kubeflow/katib.git
Compare commits
9 Commits
Author | SHA1 | Date |
---|---|---|
|
fc6beec835 | |
|
f299a22672 | |
|
f60c76f3de | |
|
2116fc7340 | |
|
1092dba86b | |
|
95e8ef16e3 | |
|
945ae81623 | |
|
0dfb344d08 | |
|
bc5add92d5 |
|
@ -136,6 +136,11 @@ func main() {
|
|||
ctx := signals.SetupSignalHandler()
|
||||
certsReady := make(chan struct{})
|
||||
defer close(certsReady)
|
||||
|
||||
// The setupControllers will register controllers to the manager
|
||||
// after generated certs for the admission webhooks.
|
||||
go setupControllers(mgr, certsReady, hookServer)
|
||||
|
||||
if initConfig.CertGeneratorConfig.Enable {
|
||||
if err = cert.AddToManager(mgr, initConfig.CertGeneratorConfig, certsReady); err != nil {
|
||||
log.Error(err, "Failed to set up cert-generator")
|
||||
|
@ -144,10 +149,6 @@ func main() {
|
|||
certsReady <- struct{}{}
|
||||
}
|
||||
|
||||
// The setupControllers will register controllers to the manager
|
||||
// after generated certs for the admission webhooks.
|
||||
go setupControllers(mgr, certsReady, hookServer)
|
||||
|
||||
log.Info("Setting up health checker.")
|
||||
if err := mgr.AddReadyzCheck("readyz", hookServer.StartedChecker()); err != nil {
|
||||
log.Error(err, "Unable to add readyz endpoint to the manager")
|
||||
|
|
|
@ -2,4 +2,5 @@ psutil==5.9.4
|
|||
rfc3339>=6.2
|
||||
grpcio>=1.41.1
|
||||
googleapis-common-protos==1.6.0
|
||||
tensorflow==2.11.0
|
||||
tensorflow==2.13.0
|
||||
protobuf<=3.20.3
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
grpcio>=1.41.1
|
||||
googleapis-common-protos==1.6.0
|
||||
cython>=0.29.24
|
||||
tensorflow==2.11.0
|
||||
tensorflow==2.13.0
|
||||
protobuf<=3.20.3
|
||||
|
|
|
@ -76,7 +76,7 @@ spec:
|
|||
- name: num-examples
|
||||
container:
|
||||
name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -62,7 +62,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -57,7 +57,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -69,7 +69,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -43,7 +43,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/simple-pbt:latest
|
||||
image: docker.io/kubeflowkatib/simple-pbt:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pbt/pbt_test.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -46,7 +46,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
@ -61,7 +61,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -56,7 +56,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: tensorflow
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:latest
|
||||
image: docker.io/kubeflowkatib/tf-mnist-with-summaries:v0.16.0
|
||||
command:
|
||||
- "python"
|
||||
- "/opt/tf-mnist-with-summaries/mnist.py"
|
||||
|
|
|
@ -67,7 +67,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -54,7 +54,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -59,7 +59,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -60,7 +60,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-cpu:v0.16.0
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -77,7 +77,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:latest
|
||||
image: docker.io/kubeflowkatib/darts-cnn-cifar10-gpu:v0.16.0
|
||||
command:
|
||||
- python3
|
||||
- run_trial.py
|
||||
|
|
|
@ -139,7 +139,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -136,7 +136,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-gpu:v0.16.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -55,7 +55,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -89,7 +89,7 @@ spec:
|
|||
description: Number of training examples
|
||||
steps:
|
||||
- name: model-training
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
scipy>=1.7.2
|
||||
tensorflow==2.11.0
|
||||
tensorflow==2.13.0
|
||||
|
|
|
@ -1 +1 @@
|
|||
tensorflow==2.11.0
|
||||
tensorflow==2.13.0
|
||||
|
|
|
@ -60,7 +60,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -15,6 +15,7 @@ spec:
|
|||
metadata:
|
||||
labels:
|
||||
katib.kubeflow.org/component: controller
|
||||
katib.kubeflow.org/metrics-collector-injection: disabled
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/port: "8080"
|
||||
|
|
|
@ -15,7 +15,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
@ -33,7 +33,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:latest
|
||||
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0
|
||||
command:
|
||||
- python3
|
||||
- -u
|
||||
|
@ -54,7 +54,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
@ -68,7 +68,7 @@ data:
|
|||
spec:
|
||||
containers:
|
||||
- name: pytorch
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:latest
|
||||
image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/pytorch-mnist/mnist.py"
|
||||
|
|
|
@ -4,4 +4,3 @@ kind: Kustomization
|
|||
|
||||
resources:
|
||||
- webhooks.yaml
|
||||
- secret.yaml
|
||||
|
|
|
@ -1,5 +0,0 @@
|
|||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: katib-webhook-cert
|
|
@ -63,6 +63,16 @@ webhooks:
|
|||
namespaceSelector:
|
||||
matchLabels:
|
||||
katib.kubeflow.org/metrics-collector-injection: enabled
|
||||
# Once the AdmissionWebhookMatchConditions feature gate is enabled by default, we should switch to control based on userInfo.
|
||||
# REF:
|
||||
# - AdmissionWebhookMatchConditions: https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchconditions
|
||||
# - Tracking issue: https://github.com/kubeflow/katib/issues/2206
|
||||
objectSelector:
|
||||
matchExpressions:
|
||||
- key: katib.kubeflow.org/metrics-collector-injection
|
||||
operator: NotIn
|
||||
values:
|
||||
- disabled
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
|
|
|
@ -14,40 +14,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -56,4 +56,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -22,13 +22,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/katib-cert-injection.yaml
|
||||
|
|
|
@ -16,40 +16,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -58,4 +58,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -18,13 +18,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
patchesStrategicMerge:
|
||||
- patches/db-manager.yaml
|
||||
# Modify katib-mysql-secrets with parameters for the DB.
|
||||
|
@ -32,6 +32,10 @@ secretGenerator:
|
|||
- name: katib-mysql-secrets
|
||||
envs:
|
||||
- secrets.env
|
||||
# Secret for webhooks certs.
|
||||
- name: katib-webhook-cert
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
configMapGenerator:
|
||||
- name: katib-config
|
||||
behavior: create
|
||||
|
|
|
@ -17,40 +17,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -59,4 +59,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -14,40 +14,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -56,4 +56,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -30,13 +30,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
|
||||
patchesJson6902:
|
||||
# Annotate Service to delegate TLS-secret generation to OpenShift service controller
|
||||
|
|
|
@ -16,40 +16,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -58,4 +58,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -20,13 +20,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
patchesJson6902:
|
||||
- target:
|
||||
group: apps
|
||||
|
@ -41,3 +41,8 @@ configMapGenerator:
|
|||
- katib-config.yaml
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
# Secret for webhooks certs.
|
||||
secretGenerator:
|
||||
- name: katib-webhook-cert
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
|
|
|
@ -16,40 +16,40 @@ init:
|
|||
runtime:
|
||||
metricsCollectors:
|
||||
- kind: StdOut
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: File
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/file-metrics-collector:v0.16.0
|
||||
- kind: TensorFlowEvent
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:latest
|
||||
image: docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 1Gi
|
||||
suggestions:
|
||||
- algorithmName: random
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0
|
||||
- algorithmName: grid
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: hyperband
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0
|
||||
- algorithmName: bayesianoptimization
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-skopt:v0.16.0
|
||||
- algorithmName: cmaes
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: sobol
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0
|
||||
- algorithmName: multivariate-tpe
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-optuna:v0.16.0
|
||||
- algorithmName: enas
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-enas:v0.16.0
|
||||
resources:
|
||||
limits:
|
||||
memory: 200Mi
|
||||
- algorithmName: darts
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-darts:v0.16.0
|
||||
- algorithmName: pbt
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:latest
|
||||
image: docker.io/kubeflowkatib/suggestion-pbt:v0.16.0
|
||||
persistentVolumeClaimSpec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
|
@ -58,4 +58,4 @@ runtime:
|
|||
storage: 5Gi
|
||||
earlyStoppings:
|
||||
- algorithmName: medianstop
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:latest
|
||||
image: docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0
|
||||
|
|
|
@ -20,13 +20,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
configMapGenerator:
|
||||
- name: katib-config
|
||||
behavior: create
|
||||
|
@ -34,3 +34,8 @@ configMapGenerator:
|
|||
- katib-config.yaml
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
# Secret for webhooks certs.
|
||||
secretGenerator:
|
||||
- name: katib-webhook-cert
|
||||
options:
|
||||
disableNameSuffixHash: true
|
||||
|
|
|
@ -11,13 +11,13 @@ resources:
|
|||
images:
|
||||
- name: docker.io/kubeflowkatib/katib-controller
|
||||
newName: docker.io/kubeflowkatib/katib-controller
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-db-manager
|
||||
newName: docker.io/kubeflowkatib/katib-db-manager
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
- name: docker.io/kubeflowkatib/katib-ui
|
||||
newName: docker.io/kubeflowkatib/katib-ui
|
||||
newTag: latest
|
||||
newTag: v0.16.0
|
||||
|
||||
patchesStrategicMerge:
|
||||
- patches/remove-namespace.yaml
|
||||
|
|
|
@ -26,12 +26,15 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"math/big"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/klog"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/manager"
|
||||
|
@ -53,11 +56,11 @@ type CertGenerator struct {
|
|||
namespace string
|
||||
webhookServiceName string
|
||||
webhookSecretName string
|
||||
fullServiceDomain string
|
||||
kubeClient client.Client
|
||||
certsReady chan struct{}
|
||||
|
||||
certs *certificates
|
||||
fullServiceDomain string
|
||||
certs *certificates
|
||||
}
|
||||
|
||||
var _ manager.Runnable = &CertGenerator{}
|
||||
|
@ -67,11 +70,50 @@ func (c *CertGenerator) Start(ctx context.Context) error {
|
|||
if err := c.generate(ctx); err != nil {
|
||||
return err
|
||||
}
|
||||
klog.Info("Waiting for certs to get ready.")
|
||||
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
|
||||
Duration: time.Second,
|
||||
Factor: 2,
|
||||
Jitter: 1,
|
||||
Steps: 10,
|
||||
Cap: time.Minute * 5,
|
||||
}, ensureCertMounted(time.Now())); err != nil {
|
||||
return err
|
||||
}
|
||||
// Sending an empty data to a certsReady means it starts to register controllers to the manager.
|
||||
c.certsReady <- struct{}{}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureCertMounted ensures that the generated certs are mounted inside the container.
|
||||
func ensureCertMounted(start time.Time) func(context.Context) (bool, error) {
|
||||
return func(ctx context.Context) (bool, error) {
|
||||
now := time.Now()
|
||||
outputLog := false
|
||||
if now.Sub(start) >= 15*time.Second {
|
||||
start = now
|
||||
outputLog = true
|
||||
}
|
||||
|
||||
certFile := filepath.Join(consts.CertDir, serverCertName)
|
||||
if _, err := os.Stat(certFile); err != nil {
|
||||
if outputLog {
|
||||
klog.Infof("Public key file %q doesn't exist in the container yet", certFile)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
keyFile := filepath.Join(consts.CertDir, serverKeyName)
|
||||
if _, err := os.Stat(keyFile); err != nil {
|
||||
if outputLog {
|
||||
klog.Infof("Private key file %q doesn't exist in the container yet", keyFile)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
klog.Info("Succeeded to be mounted certs inside the container.")
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CertGenerator) NeedLeaderElection() bool {
|
||||
return false
|
||||
}
|
||||
|
@ -82,8 +124,13 @@ func AddToManager(mgr manager.Manager, config configv1beta1.CertGeneratorConfig,
|
|||
namespace: consts.DefaultKatibNamespace,
|
||||
webhookServiceName: config.WebhookServiceName,
|
||||
webhookSecretName: config.WebhookSecretName,
|
||||
kubeClient: mgr.GetClient(),
|
||||
certsReady: certsReady,
|
||||
fullServiceDomain: strings.Join([]string{
|
||||
config.WebhookServiceName,
|
||||
consts.DefaultKatibNamespace,
|
||||
"svc",
|
||||
}, "."),
|
||||
kubeClient: mgr.GetClient(),
|
||||
certsReady: certsReady,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -99,8 +146,6 @@ func (c *CertGenerator) generate(ctx context.Context) error {
|
|||
return fmt.Errorf("%w: %v", errCertCheckFail, err)
|
||||
}
|
||||
if !certExist {
|
||||
c.fullServiceDomain = strings.Join([]string{c.webhookServiceName, c.namespace, "svc"}, ".")
|
||||
|
||||
if err = c.createCert(); err != nil {
|
||||
return fmt.Errorf("%w: %v", errCreateCertFail, err)
|
||||
}
|
||||
|
|
|
@ -18,8 +18,11 @@ package certgenerator
|
|||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
@ -31,6 +34,7 @@ import (
|
|||
"sigs.k8s.io/controller-runtime/pkg/client/fake"
|
||||
|
||||
configv1beta1 "github.com/kubeflow/katib/pkg/apis/config/v1beta1"
|
||||
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
|
||||
)
|
||||
|
||||
func TestGenerate(t *testing.T) {
|
||||
|
@ -210,3 +214,61 @@ func buildFakeClient(kubeResources []client.Object) client.Client {
|
|||
}
|
||||
return fakeClientBuilder.Build()
|
||||
}
|
||||
|
||||
func TestEnsureCertMounted(t *testing.T) {
|
||||
tests := map[string]struct {
|
||||
keyExist bool
|
||||
certExist bool
|
||||
wantExist bool
|
||||
}{
|
||||
"key and cert exist": {
|
||||
keyExist: true,
|
||||
certExist: true,
|
||||
wantExist: true,
|
||||
},
|
||||
"key doesn't exist": {
|
||||
keyExist: false,
|
||||
certExist: true,
|
||||
wantExist: false,
|
||||
},
|
||||
"cert doesn't exist": {
|
||||
keyExist: true,
|
||||
certExist: false,
|
||||
wantExist: false,
|
||||
},
|
||||
"all files doesn't exist": {
|
||||
keyExist: false,
|
||||
certExist: false,
|
||||
wantExist: false,
|
||||
},
|
||||
}
|
||||
for name, tc := range tests {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
if tc.keyExist || tc.certExist {
|
||||
if err := os.MkdirAll(consts.CertDir, 0760); err != nil {
|
||||
t.Fatalf("Failed to set up directory: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(consts.CertDir); err != nil {
|
||||
t.Fatalf("Failed to clean up directory: %v", err)
|
||||
}
|
||||
}()
|
||||
}
|
||||
if tc.keyExist {
|
||||
if _, err := os.Create(filepath.Join(consts.CertDir, serverKeyName)); err != nil {
|
||||
t.Fatalf("Failed to create tls.key: %v", err)
|
||||
}
|
||||
}
|
||||
if tc.certExist {
|
||||
if _, err := os.Create(filepath.Join(consts.CertDir, serverCertName)); err != nil {
|
||||
t.Fatalf("Failed to create tls.crt: %v", err)
|
||||
}
|
||||
}
|
||||
ensureFunc := ensureCertMounted(time.Now())
|
||||
got, _ := ensureFunc(context.Background())
|
||||
if tc.wantExist != got {
|
||||
t.Errorf("Unexpected value from ensureCertMounted: \n(want: %v, got: %v)\n", tc.wantExist, got)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,10 +22,11 @@
|
|||
# https://github.com/kubeflow/katib/blob/master/examples/v1beta1/kubeflow-training-operator/tfjob-mnist-with-summaries.yaml#L16-L22
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
|
||||
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator, TensorEvent
|
||||
from tensorboard.backend.event_processing.tag_types import TENSORS
|
||||
import os
|
||||
from datetime import datetime
|
||||
import rfc3339
|
||||
from datetime import datetime
|
||||
import api_pb2
|
||||
from logging import getLogger, StreamHandler, INFO
|
||||
from pkg.metricscollector.v1beta1.common import const
|
||||
|
@ -43,9 +44,9 @@ class TFEventFileParser:
|
|||
|
||||
def parse_summary(self, tfefile):
|
||||
metric_logs = []
|
||||
event_accumulator = EventAccumulator(tfefile, size_guidance={'tensors': 0})
|
||||
event_accumulator = EventAccumulator(tfefile, size_guidance={TENSORS: 0})
|
||||
event_accumulator.Reload()
|
||||
for tag in event_accumulator.Tags()['tensors']:
|
||||
for tag in event_accumulator.Tags()[TENSORS]:
|
||||
for m in self.metric_names:
|
||||
|
||||
tfefile_parent_dir = os.path.dirname(m) if len(m.split("/")) >= 2 else os.path.dirname(tfefile)
|
||||
|
@ -53,12 +54,12 @@ class TFEventFileParser:
|
|||
if not tag.startswith(m.split("/")[-1]) or not basedir_name.endswith(tfefile_parent_dir):
|
||||
continue
|
||||
|
||||
for wall_time, step, tensor in event_accumulator.Tensors(tag):
|
||||
for tensor in event_accumulator.Tensors(tag):
|
||||
ml = api_pb2.MetricLog(
|
||||
time_stamp=rfc3339.rfc3339(datetime.fromtimestamp(wall_time)),
|
||||
time_stamp=rfc3339.rfc3339(datetime.fromtimestamp(tensor.wall_time)),
|
||||
metric=api_pb2.Metric(
|
||||
name=m,
|
||||
value=str(tf.make_ndarray(tensor))
|
||||
value=str(tf.make_ndarray(tensor.tensor_proto))
|
||||
)
|
||||
)
|
||||
metric_logs.append(ml)
|
||||
|
|
|
@ -50,8 +50,8 @@ TRIAL_CONDITION_SUCCEEDED = "Succeeded"
|
|||
|
||||
# Supported base images for the Katib Trials.
|
||||
# TODO (andreyvelich): Implement list_base_images function to get each image description.
|
||||
BASE_IMAGE_TENSORFLOW = "docker.io/tensorflow/tensorflow:2.11.0"
|
||||
BASE_IMAGE_TENSORFLOW_GPU = "docker.io/tensorflow/tensorflow:2.11.0-gpu"
|
||||
BASE_IMAGE_TENSORFLOW = "docker.io/tensorflow/tensorflow:2.13.0"
|
||||
BASE_IMAGE_TENSORFLOW_GPU = "docker.io/tensorflow/tensorflow:2.13.0-gpu"
|
||||
BASE_IMAGE_PYTORCH = "docker.io/pytorch/pytorch:1.12.1-cuda11.3-cudnn8-runtime"
|
||||
BASE_IMAGE_MXNET = "docker.io/mxnet/python:1.9.1_native_py3"
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ if os.path.exists(katib_grpc_api_file):
|
|||
|
||||
setuptools.setup(
|
||||
name="kubeflow-katib",
|
||||
version="0.15.0",
|
||||
version="0.16.0",
|
||||
author="Kubeflow Authors",
|
||||
author_email="premnath.vel@gmail.com",
|
||||
license="Apache License Version 2.0",
|
||||
|
|
|
@ -66,7 +66,8 @@ cd ../../../../../ && WITH_DATABASE_TYPE=$WITH_DATABASE_TYPE make deploy && cd -
|
|||
|
||||
# Wait until all Katib pods is running.
|
||||
TIMEOUT=120s
|
||||
kubectl wait --for=condition=ready --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
|
||||
|
||||
kubectl wait --for=condition=ContainersReady=True --timeout=${TIMEOUT} -l "katib.kubeflow.org/component in ($WITH_DATABASE_TYPE,controller,db-manager,ui)" -n kubeflow pod ||
|
||||
(kubectl get pods -n kubeflow && kubectl describe pods -n kubeflow && exit 1)
|
||||
|
||||
echo "All Katib components are running."
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
|
@ -52,7 +52,7 @@ spec:
|
|||
spec:
|
||||
containers:
|
||||
- name: training-container
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:latest
|
||||
image: docker.io/kubeflowkatib/mxnet-mnist:v0.16.0
|
||||
command:
|
||||
- "python3"
|
||||
- "/opt/mxnet-mnist/mnist.py"
|
||||
|
|
Loading…
Reference in New Issue