diff --git a/charts/spark-operator-chart/README.md b/charts/spark-operator-chart/README.md index 5d842bf7..051376df 100644 --- a/charts/spark-operator-chart/README.md +++ b/charts/spark-operator-chart/README.md @@ -170,6 +170,7 @@ See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall) for command docum | prometheus.metrics.portName | string | `"metrics"` | Metrics port name. | | prometheus.metrics.endpoint | string | `"/metrics"` | Metrics serving endpoint. | | prometheus.metrics.prefix | string | `""` | Metrics prefix, will be added to all exported metrics. | +| prometheus.metrics.jobStartLatencyBuckets | string | `"30,60,90,120,150,180,210,240,270,300"` | Job Start Latency histogram buckets. Specified in seconds. | | prometheus.podMonitor.create | bool | `false` | Specifies whether to create pod monitor. Note that prometheus metrics should be enabled as well. | | prometheus.podMonitor.labels | object | `{}` | Pod monitor labels | | prometheus.podMonitor.jobLabel | string | `"spark-operator-podmonitor"` | The label to use to retrieve the job name from | diff --git a/charts/spark-operator-chart/templates/controller/deployment.yaml b/charts/spark-operator-chart/templates/controller/deployment.yaml index 9f5fcb5f..fa5cc151 100644 --- a/charts/spark-operator-chart/templates/controller/deployment.yaml +++ b/charts/spark-operator-chart/templates/controller/deployment.yaml @@ -88,6 +88,7 @@ spec: - --metrics-endpoint={{ .Values.prometheus.metrics.endpoint }} - --metrics-prefix={{ .Values.prometheus.metrics.prefix }} - --metrics-labels=app_type + - --metrics-job-start-latency-buckets={{ .Values.prometheus.metrics.jobStartLatencyBuckets }} {{- end }} {{ if .Values.controller.leaderElection.enable }} - --leader-election=true diff --git a/charts/spark-operator-chart/tests/controller/deployment_test.yaml b/charts/spark-operator-chart/tests/controller/deployment_test.yaml index 5032a90d..02614710 100644 --- a/charts/spark-operator-chart/tests/controller/deployment_test.yaml +++ b/charts/spark-operator-chart/tests/controller/deployment_test.yaml @@ -214,6 +214,7 @@ tests: portName: test-port endpoint: /test-endpoint prefix: test-prefix + jobStartLatencyBuckets: "180,360,420,690" asserts: - contains: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args @@ -230,6 +231,9 @@ tests: - contains: path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args content: --metrics-labels=app_type + - contains: + path: spec.template.spec.containers[?(@.name=="spark-operator-controller")].args + content: --metrics-job-start-latency-buckets=180,360,420,690 - it: Should enable leader election by default asserts: diff --git a/charts/spark-operator-chart/values.yaml b/charts/spark-operator-chart/values.yaml index 3d4cad68..430c1e7e 100644 --- a/charts/spark-operator-chart/values.yaml +++ b/charts/spark-operator-chart/values.yaml @@ -391,6 +391,8 @@ prometheus: endpoint: /metrics # -- Metrics prefix, will be added to all exported metrics. prefix: "" + # -- Job Start Latency histogram buckets. Specified in seconds. + jobStartLatencyBuckets: "30,60,90,120,150,180,210,240,270,300" # Prometheus pod monitor for controller pods podMonitor: