Updating FlinkDeployment interpreter to display error status, improving health interpreter
Signed-off-by: mszacillo <mszacillo@bloomberg.net>
This commit is contained in:
parent
820fd06409
commit
f14e0f920f
|
@ -7,14 +7,27 @@ spec:
|
||||||
apiVersion: flink.apache.org/v1beta1
|
apiVersion: flink.apache.org/v1beta1
|
||||||
kind: FlinkDeployment
|
kind: FlinkDeployment
|
||||||
customizations:
|
customizations:
|
||||||
|
# FlinkDeployment health is interpreted based on the application's state.
|
||||||
|
#
|
||||||
|
# Health Rules:
|
||||||
|
# 1. If the job is in a terminal state [Failed, Finished, Canceled, Suspended] or in the Running state, it is considered healthy.
|
||||||
|
# 2. If the job is in an ephemeral state [Reconciling, Initializing, Created]:
|
||||||
|
# - It is treated as healthy ONLY if there is a published error (e.g., user-related issues like an incorrect image path).
|
||||||
|
# - Otherwise, it is treated as unhealthy and may be rescheduled.
|
||||||
|
# 3. Short-lived states [Cancelling, Failing, Restarting] are treated as healthy because they will directly transition to their respective terminal states:
|
||||||
|
# - Cancelling -> Canceled / Suspended
|
||||||
|
# - Failing -> Failed
|
||||||
|
# - Restarting triggers a restart, bringing the job back to the Created state.
|
||||||
|
#
|
||||||
|
# For more information on the Flink state diagram, refer to the official documentation: https://nightlies.apache.org/flink/flink-docs-release-1.20/docs/internals/job_scheduling/
|
||||||
healthInterpretation:
|
healthInterpretation:
|
||||||
luaScript: >
|
luaScript: >
|
||||||
function InterpretHealth(observedObj)
|
function InterpretHealth(observedObj)
|
||||||
if observedObj.status ~= nil and observedObj.status.jobStatus ~= nil then
|
if observedObj.status ~= nil and observedObj.status.jobStatus ~= nil and observedObj.status.jobStatus.state ~= nil then
|
||||||
if observedObj.status.jobStatus.state ~= 'CREATED' and observedObj.status.jobStatus.state ~= 'RECONCILING' then
|
if observedObj.status.jobStatus.state ~= 'CREATED' and observedObj.status.jobStatus.state ~= 'INITIALIZING' and observedObj.status.jobStatus.state ~= 'RECONCILING' then
|
||||||
return true
|
return true
|
||||||
else
|
else
|
||||||
return observedObj.status.jobManagerDeploymentStatus == 'ERROR'
|
return observedObj.status.error ~= nil or observedObj.status.jobManagerDeploymentStatus == 'ERROR'
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
return false
|
return false
|
||||||
|
@ -91,6 +104,7 @@ spec:
|
||||||
desiredObj.status = {}
|
desiredObj.status = {}
|
||||||
end
|
end
|
||||||
clusterInfo = {}
|
clusterInfo = {}
|
||||||
|
error = ''
|
||||||
jobManagerDeploymentStatus = ''
|
jobManagerDeploymentStatus = ''
|
||||||
jobStatus = {}
|
jobStatus = {}
|
||||||
lifecycleState = ''
|
lifecycleState = ''
|
||||||
|
@ -102,6 +116,7 @@ spec:
|
||||||
currentStatus = statusItems[i].status
|
currentStatus = statusItems[i].status
|
||||||
if currentStatus ~= nil then
|
if currentStatus ~= nil then
|
||||||
clusterInfo = currentStatus.clusterInfo
|
clusterInfo = currentStatus.clusterInfo
|
||||||
|
error = currentStatus.error
|
||||||
jobManagerDeploymentStatus = currentStatus.jobManagerDeploymentStatus
|
jobManagerDeploymentStatus = currentStatus.jobManagerDeploymentStatus
|
||||||
jobStatus = currentStatus.jobStatus
|
jobStatus = currentStatus.jobStatus
|
||||||
observedGeneration = currentStatus.observedGeneration
|
observedGeneration = currentStatus.observedGeneration
|
||||||
|
@ -112,6 +127,7 @@ spec:
|
||||||
end
|
end
|
||||||
|
|
||||||
desiredObj.status.clusterInfo = clusterInfo
|
desiredObj.status.clusterInfo = clusterInfo
|
||||||
|
desiredObj.status.error = error
|
||||||
desiredObj.status.jobManagerDeploymentStatus = jobManagerDeploymentStatus
|
desiredObj.status.jobManagerDeploymentStatus = jobManagerDeploymentStatus
|
||||||
desiredObj.status.jobStatus = jobStatus
|
desiredObj.status.jobStatus = jobStatus
|
||||||
desiredObj.status.lifecycleState = lifecycleState
|
desiredObj.status.lifecycleState = lifecycleState
|
||||||
|
@ -128,6 +144,7 @@ spec:
|
||||||
return status
|
return status
|
||||||
end
|
end
|
||||||
status.clusterInfo = observedObj.status.clusterInfo
|
status.clusterInfo = observedObj.status.clusterInfo
|
||||||
|
status.error = observedObj.status.error
|
||||||
status.jobManagerDeploymentStatus = observedObj.status.jobManagerDeploymentStatus
|
status.jobManagerDeploymentStatus = observedObj.status.jobManagerDeploymentStatus
|
||||||
status.jobStatus = observedObj.status.jobStatus
|
status.jobStatus = observedObj.status.jobStatus
|
||||||
status.observedGeneration = observedObj.status.observedGeneration
|
status.observedGeneration = observedObj.status.observedGeneration
|
||||||
|
|
Loading…
Reference in New Issue