255 lines
8.7 KiB
Python
255 lines
8.7 KiB
Python
# Copyright 2023 The Kubeflow Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Utilities for working with placeholders."""
|
|
import json
|
|
import random
|
|
import re
|
|
from typing import Any, Dict, List, Optional, Union
|
|
|
|
from kfp import dsl
|
|
|
|
|
|
def make_random_id():
|
|
"""Makes a random 8 digit integer."""
|
|
return str(random.randint(0, 99999999))
|
|
|
|
|
|
def replace_placeholders(
|
|
full_command: List[str],
|
|
executor_input_dict: Dict[str, Any],
|
|
pipeline_resource_name: str,
|
|
task_resource_name: str,
|
|
pipeline_root: str,
|
|
) -> List[str]:
|
|
"""Iterates over each element in the command and replaces placeholders."""
|
|
unique_pipeline_id = make_random_id()
|
|
unique_task_id = make_random_id()
|
|
provided_inputs = get_provided_inputs(executor_input_dict)
|
|
full_command = [
|
|
resolve_struct_placeholders(
|
|
placeholder,
|
|
provided_inputs,
|
|
) for placeholder in full_command
|
|
]
|
|
full_command = flatten_list(full_command)
|
|
resolved_command = []
|
|
for el in full_command:
|
|
resolved_el = resolve_individual_placeholder(
|
|
element=el,
|
|
executor_input_dict=executor_input_dict,
|
|
pipeline_resource_name=pipeline_resource_name,
|
|
task_resource_name=task_resource_name,
|
|
pipeline_root=pipeline_root,
|
|
pipeline_job_id=unique_pipeline_id,
|
|
pipeline_task_id=unique_task_id,
|
|
)
|
|
if resolved_el is None:
|
|
continue
|
|
elif isinstance(resolved_el, str):
|
|
resolved_command.append(resolved_el)
|
|
elif isinstance(resolved_el, list):
|
|
resolved_command.extend(resolved_el)
|
|
else:
|
|
raise ValueError(
|
|
f'Got unknown command element {resolved_el} of type {type(resolved_el)}.'
|
|
)
|
|
return resolved_command
|
|
|
|
|
|
def flatten_list(l: List[Union[str, list, None]]) -> List[str]:
|
|
"""Iteratively flattens arbitrarily deeply nested lists, filtering out
|
|
elements that are None."""
|
|
result = []
|
|
stack = l.copy()
|
|
while stack:
|
|
element = stack.pop(0)
|
|
if isinstance(element, list):
|
|
stack = element + stack
|
|
elif element is not None:
|
|
result.append(element)
|
|
return result
|
|
|
|
|
|
def get_provided_inputs(executor_input_dict: Dict[str, Any]) -> Dict[str, Any]:
|
|
params = executor_input_dict.get('inputs', {}).get('parameterValues', {})
|
|
pkeys = [k for k, v in params.items() if v is not None]
|
|
artifacts = executor_input_dict.get('inputs', {}).get('artifacts', {})
|
|
akeys = [k for k, v in artifacts.items() if v is not None]
|
|
return pkeys + akeys
|
|
|
|
|
|
def get_value_using_path(
|
|
dictionary: Dict[str, Any],
|
|
path: List[str],
|
|
) -> Optional[Any]:
|
|
list_or_dict = dictionary
|
|
if not path:
|
|
raise ValueError('path cannot be empty.')
|
|
try:
|
|
for p in path:
|
|
list_or_dict = list_or_dict[p]
|
|
return list_or_dict
|
|
except KeyError:
|
|
return None
|
|
|
|
|
|
def convert_placeholder_parts_to_path(parts: List[str]) -> List[str]:
|
|
# if inputs, parameters --> parameterValues
|
|
if parts[0] == 'inputs' and parts[1] == 'parameters':
|
|
parts[1] = 'parameterValues'
|
|
|
|
# if outputs, parameter output_file --> outputFile
|
|
if parts[0] == 'outputs' and parts[1] == 'parameters' and parts[
|
|
3] == 'output_file':
|
|
parts[3] = 'outputFile'
|
|
|
|
# if artifacts...
|
|
if parts[1] == 'artifacts':
|
|
|
|
# ...need to get nested artifact object...
|
|
parts.insert(3, 'artifacts')
|
|
# ...and first entry in list with index 0
|
|
parts.insert(4, 0)
|
|
|
|
# for local, path is the uri
|
|
if parts[5] == 'path':
|
|
parts[5] = 'uri'
|
|
|
|
return parts
|
|
|
|
|
|
def resolve_io_placeholders(
|
|
executor_input: Dict[str, Any],
|
|
command: str,
|
|
) -> str:
|
|
placeholders = re.findall(r'\{\{\$\.(.*?)\}\}', command)
|
|
|
|
# e.g., placeholder = "inputs.parameters[''text'']"
|
|
for placeholder in placeholders:
|
|
if 'json_escape' in placeholder:
|
|
raise ValueError('JSON escape placeholders are not supported.')
|
|
|
|
# e.g., parts = ['inputs', 'parameters', '', 'text', '', '']
|
|
parts = re.split(r'\.|\[|\]|\'\'|\'', placeholder)
|
|
|
|
# e.g., nonempty_parts = ['inputs', 'parameters', 'text']
|
|
nonempty_parts = [part for part in parts if part]
|
|
|
|
# e.g., path = ['inputs', 'parameterValues', 'text']
|
|
path = convert_placeholder_parts_to_path(nonempty_parts)
|
|
|
|
# e.g., path = ['inputs', 'parameterValues', 'text']
|
|
value = get_value_using_path(executor_input, path)
|
|
if not isinstance(value, str):
|
|
# even if value is None, should json.dumps to null
|
|
# and still resolve placeholder
|
|
value = json.dumps(value)
|
|
command = command.replace('{{$.' + placeholder + '}}', value)
|
|
|
|
return command
|
|
|
|
|
|
def resolve_struct_placeholders(
|
|
placeholder: str,
|
|
provided_inputs: List[str],
|
|
) -> List[Any]:
|
|
"""Resolves IfPresent and Concat placeholders to an arbitrarily deeply
|
|
nested list of strings, which may contain None."""
|
|
|
|
# throughout, filter out None for the case where IfPresent False and no else
|
|
def filter_none(l: List[Any]) -> List[Any]:
|
|
return [e for e in l if e is not None]
|
|
|
|
def recursively_resolve_struct(placeholder: Dict[str, Any]) -> str:
|
|
if isinstance(placeholder, str):
|
|
return placeholder
|
|
elif isinstance(placeholder, list):
|
|
raise ValueError(
|
|
f"You have an incorrectly nested {dsl.IfPresentPlaceholder!r} with a list provided for 'then' or 'else'."
|
|
)
|
|
|
|
first_key = list(placeholder.keys())[0]
|
|
if first_key == 'Concat':
|
|
concat = [
|
|
recursively_resolve_struct(p) for p in placeholder['Concat']
|
|
]
|
|
return ''.join(filter_none(concat))
|
|
elif first_key == 'IfPresent':
|
|
inner_struct = placeholder['IfPresent']
|
|
if inner_struct['InputName'] in provided_inputs:
|
|
then = inner_struct['Then']
|
|
if isinstance(then, str):
|
|
return then
|
|
elif isinstance(then, list):
|
|
return filter_none(
|
|
[recursively_resolve_struct(p) for p in then])
|
|
elif isinstance(then, dict):
|
|
return recursively_resolve_struct(then)
|
|
else:
|
|
else_ = inner_struct.get('Else')
|
|
if else_ is None:
|
|
return else_
|
|
if isinstance(else_, str):
|
|
return else_
|
|
elif isinstance(else_, list):
|
|
return filter_none(
|
|
[recursively_resolve_struct(p) for p in else_])
|
|
elif isinstance(else_, dict):
|
|
return recursively_resolve_struct(else_)
|
|
else:
|
|
raise ValueError
|
|
|
|
if placeholder.startswith('{"Concat": ') or placeholder.startswith(
|
|
'{"IfPresent": '):
|
|
des_placeholder = json.loads(placeholder)
|
|
return recursively_resolve_struct(des_placeholder)
|
|
else:
|
|
return placeholder
|
|
|
|
|
|
def resolve_individual_placeholder(
|
|
element: str,
|
|
executor_input_dict: Dict[str, Any],
|
|
pipeline_resource_name: str,
|
|
task_resource_name: str,
|
|
pipeline_root: str,
|
|
pipeline_job_id: str,
|
|
pipeline_task_id: str,
|
|
) -> str:
|
|
"""Replaces placeholders for a single element."""
|
|
# match on literal for constant placeholders
|
|
PLACEHOLDERS = {
|
|
r'{{$.outputs.output_file}}':
|
|
executor_input_dict['outputs']['outputFile'],
|
|
r'{{$.outputMetadataUri}}':
|
|
executor_input_dict['outputs']['outputFile'],
|
|
r'{{$}}':
|
|
json.dumps(executor_input_dict),
|
|
dsl.PIPELINE_JOB_NAME_PLACEHOLDER:
|
|
pipeline_resource_name,
|
|
dsl.PIPELINE_JOB_ID_PLACEHOLDER:
|
|
pipeline_job_id,
|
|
dsl.PIPELINE_TASK_NAME_PLACEHOLDER:
|
|
task_resource_name,
|
|
dsl.PIPELINE_TASK_ID_PLACEHOLDER:
|
|
pipeline_task_id,
|
|
dsl.PIPELINE_ROOT_PLACEHOLDER:
|
|
pipeline_root,
|
|
}
|
|
for placeholder, value in PLACEHOLDERS.items():
|
|
element = element.replace(placeholder, value)
|
|
|
|
# match non-constant placeholders (i.e., have key(s))
|
|
return resolve_io_placeholders(executor_input_dict, element)
|