479 lines
17 KiB
Python
479 lines
17 KiB
Python
# Copyright 2022 The Kubeflow Authors
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""Contains data structures and functions for handling input and output
|
|
placeholders."""
|
|
|
|
import abc
|
|
import dataclasses
|
|
import json
|
|
from json.decoder import JSONArray # type: ignore
|
|
from json.scanner import py_make_scanner
|
|
import re
|
|
from typing import Any, Dict, List, Optional, Union
|
|
|
|
from kfp.components import base_model
|
|
|
|
|
|
class Placeholder(abc.ABC):
|
|
"""Abstract base class for Placeholders.
|
|
|
|
All placeholders must implement these methods to be handled
|
|
appropriately downstream.
|
|
"""
|
|
|
|
@classmethod
|
|
@abc.abstractmethod
|
|
def from_placeholder_string(cls, placeholder_string: str) -> 'Placeholder':
|
|
"""Converts a placeholder string to the placeholder object that
|
|
implements this method.
|
|
|
|
Args:
|
|
placeholder_string (str): The placeholder string.
|
|
|
|
Returns:
|
|
Placeholder: The placeholder object that implements this method.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@classmethod
|
|
@abc.abstractmethod
|
|
def is_match(cls, placeholder_string: str) -> bool:
|
|
"""Checks if the placeholder string matches the placeholder object that
|
|
implements this method.
|
|
|
|
Args:
|
|
placeholder_string (str): The placeholder string.
|
|
|
|
Returns:
|
|
bool: Whether the placeholder string matches the placeholder object that implements this method and can be converted to an instance of the placeholder object.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@abc.abstractmethod
|
|
def to_placeholder_string(self) -> str:
|
|
"""Converts the placeholder object that implements this to a
|
|
placeholder string.
|
|
|
|
Returns:
|
|
str: The placeholder string.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
@abc.abstractmethod
|
|
def to_dict(self, by_alias: bool = False) -> Dict[str, Any]:
|
|
"""Converts the placeholder object that implements this to a
|
|
dictionary. This ensures that this concrete placeholder classes also
|
|
inherit from kfp.components.base_model.BaseModel.
|
|
|
|
Args:
|
|
by_alias (bool, optional): Whether to use attribute name to alias field mapping provided by cls._aliases when converting to dictionary. Defaults to False.
|
|
|
|
Returns:
|
|
Dict[str, Any]: Dictionary representation of the object.
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
|
|
class RegexPlaceholderSerializationMixin(Placeholder):
|
|
"""Mixin for *Placeholder objects that handles the
|
|
serialization/deserialization of the placeholder."""
|
|
_FROM_PLACEHOLDER: Union[re.Pattern, type(NotImplemented)] = NotImplemented
|
|
_TO_PLACEHOLDER: Union[str, type(NotImplemented)] = NotImplemented
|
|
|
|
@classmethod
|
|
def is_match(cls, placeholder_string: str) -> bool:
|
|
"""Determines if the placeholder_string matches the placeholder pattern
|
|
using the _FROM_PLACEHOLDER regex.
|
|
|
|
Args:
|
|
placeholder_string (str): The string (often "{{$.inputs/outputs...}}") to check.
|
|
|
|
Returns:
|
|
bool: Determines if the placeholder_string matches the placeholder pattern.
|
|
"""
|
|
return cls._FROM_PLACEHOLDER.match(placeholder_string) is not None
|
|
|
|
@classmethod
|
|
def from_placeholder_string(
|
|
cls,
|
|
placeholder_string: str) -> 'RegexPlaceholderSerializationMixin':
|
|
"""Converts a placeholder string into a placeholder object.
|
|
|
|
Args:
|
|
placeholder_string (str): The placeholder.
|
|
|
|
Returns:
|
|
PlaceholderSerializationMixin subclass: The placeholder object.
|
|
"""
|
|
if cls._FROM_PLACEHOLDER == NotImplemented:
|
|
raise NotImplementedError(
|
|
f'{cls.__name__} does not support placeholder parsing.')
|
|
matches = re.search(cls._FROM_PLACEHOLDER, placeholder_string)
|
|
if matches is None:
|
|
raise ValueError(
|
|
f'Could not parse placeholder: {placeholder_string} into {cls.__name__}'
|
|
)
|
|
field_names = [field.name for field in dataclasses.fields(cls)]
|
|
if len(matches.groups()) > len(field_names):
|
|
raise ValueError(
|
|
f'Could not parse placeholder string: {placeholder_string}. Expected no more than {len(field_names)} groups matched for fields {field_names}. Got {len(matches.groups())} matched: {matches.groups()}.'
|
|
)
|
|
kwargs = {field_name: matches[field_name] for field_name in field_names}
|
|
return cls(**kwargs)
|
|
|
|
def to_placeholder_string(self) -> str:
|
|
"""Converts a placeholder object into a placeholder string.
|
|
|
|
Returns:
|
|
str: The placeholder string.
|
|
"""
|
|
if self._TO_PLACEHOLDER == NotImplemented:
|
|
raise NotImplementedError(
|
|
f'{self.__class__.__name__} does not support creating placeholder strings.'
|
|
)
|
|
|
|
return self._TO_PLACEHOLDER.format(**self.to_dict())
|
|
|
|
|
|
class ExecutorInputPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that represents executor input placeholder."""
|
|
_TO_PLACEHOLDER = '{{$}}'
|
|
_FROM_PLACEHOLDER = re.compile(r'\{\{\$\}\}')
|
|
|
|
def to_placeholder_string(self) -> str:
|
|
return self._TO_PLACEHOLDER
|
|
|
|
|
|
class InputValuePlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds an input value placeholder.
|
|
|
|
Attributes:
|
|
output_name: Name of the input.
|
|
"""
|
|
input_name: str
|
|
_aliases = {'input_name': 'inputValue'}
|
|
_TO_PLACEHOLDER = "{{{{$.inputs.parameters['{input_name}']}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"\{\{\$\.inputs\.parameters\[(?:''|'|\")(?P<input_name>.+?)(?:''|'|\")]\}\}"
|
|
)
|
|
|
|
|
|
class InputPathPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds an input path placeholder.
|
|
|
|
Attributes:
|
|
output_name: Name of the input.
|
|
"""
|
|
input_name: str
|
|
_aliases = {'input_name': 'inputPath'}
|
|
_TO_PLACEHOLDER = "{{{{$.inputs.artifacts['{input_name}'].path}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"^\{\{\$\.inputs\.artifacts\[(?:''|'|\")(?P<input_name>.+?)(?:''|'|\")]\.path\}\}$"
|
|
)
|
|
|
|
|
|
class InputUriPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds an input uri placeholder.
|
|
|
|
Attributes:
|
|
output_name: Name of the input.
|
|
"""
|
|
input_name: str
|
|
_aliases = {'input_name': 'inputUri'}
|
|
_TO_PLACEHOLDER = "{{{{$.inputs.artifacts['{input_name}'].uri}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"^\{\{\$\.inputs\.artifacts\[(?:''|'|\")(?P<input_name>.+?)(?:''|'|\")]\.uri\}\}$"
|
|
)
|
|
|
|
|
|
class OutputParameterPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds an output parameter placeholder.
|
|
|
|
Attributes:
|
|
output_name: Name of the input.
|
|
"""
|
|
output_name: str
|
|
_aliases = {'output_name': 'outputPath'}
|
|
_TO_PLACEHOLDER = "{{{{$.outputs.parameters['{output_name}'].output_file}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"^\{\{\$\.outputs\.parameters\[(?:''|'|\")(?P<output_name>.+?)(?:''|'|\")]\.output_file\}\}$"
|
|
)
|
|
|
|
|
|
class OutputPathPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds an output path placeholder.
|
|
|
|
Attributes:
|
|
output_name: Name of the input.
|
|
"""
|
|
output_name: str
|
|
_aliases = {'output_name': 'outputPath'}
|
|
_TO_PLACEHOLDER = "{{{{$.outputs.artifacts['{output_name}'].path}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"^\{\{\$\.outputs\.artifacts\[(?:''|'|\")(?P<output_name>.+?)(?:''|'|\")]\.path\}\}$"
|
|
)
|
|
|
|
|
|
class OutputUriPlaceholder(base_model.BaseModel,
|
|
RegexPlaceholderSerializationMixin):
|
|
"""Class that holds output uri for conditional cases.
|
|
|
|
Attributes:
|
|
output_name: name of the output.
|
|
"""
|
|
output_name: str
|
|
_aliases = {'output_name': 'outputUri'}
|
|
_TO_PLACEHOLDER = "{{{{$.outputs.artifacts['{output_name}'].uri}}}}"
|
|
_FROM_PLACEHOLDER = re.compile(
|
|
r"^\{\{\$\.outputs\.artifacts\[(?:''|'|\")(?P<output_name>.+?)(?:''|'|\")]\.uri\}\}$"
|
|
)
|
|
|
|
|
|
CommandLineElement = Union[str, ExecutorInputPlaceholder, InputValuePlaceholder,
|
|
InputPathPlaceholder, InputUriPlaceholder,
|
|
OutputParameterPlaceholder, OutputPathPlaceholder,
|
|
OutputUriPlaceholder, 'IfPresentPlaceholder',
|
|
'ConcatPlaceholder']
|
|
|
|
|
|
class ConcatPlaceholder(base_model.BaseModel, Placeholder):
|
|
"""Placeholder for concatenating multiple strings. May contain other
|
|
placeholders.
|
|
|
|
Attributes:
|
|
items: Elements to concatenate.
|
|
"""
|
|
items: List[CommandLineElement]
|
|
|
|
@classmethod
|
|
def split_cel_concat_string(self, string: str) -> List[str]:
|
|
"""Splits a cel string into a list of strings, which may be normal
|
|
strings or placeholder strings.
|
|
|
|
Args:
|
|
cel_string (str): The cel string.
|
|
|
|
Returns:
|
|
List[str]: The list of strings.
|
|
"""
|
|
concat_char = '+'
|
|
start_ends = [(match.start(0), match.end(0)) for match in
|
|
InputValuePlaceholder._FROM_PLACEHOLDER.finditer(string)]
|
|
|
|
items = []
|
|
if start_ends:
|
|
start = 0
|
|
for match_start, match_end in start_ends:
|
|
leading_string = string[start:match_start]
|
|
if leading_string and leading_string != concat_char:
|
|
items.append(leading_string)
|
|
items.append(string[match_start:match_end])
|
|
start = match_end
|
|
trailing_string = string[match_end:]
|
|
if trailing_string and trailing_string != concat_char:
|
|
items.append(trailing_string)
|
|
return items
|
|
|
|
@classmethod
|
|
def is_match(cls, placeholder_string: str) -> bool:
|
|
# 'Concat' is the explicit struct for concatenation
|
|
# cel splitting handles the cases of {{input}}+{{input}} and {{input}}otherstring
|
|
return 'Concat' in json_load_nested_placeholder_aware(
|
|
placeholder_string
|
|
) or len(
|
|
ConcatPlaceholder.split_cel_concat_string(placeholder_string)) > 1
|
|
|
|
def to_placeholder_struct(self) -> Dict[str, Any]:
|
|
return {
|
|
"Concat": [
|
|
maybe_convert_placeholder_to_placeholder_string(item)
|
|
for item in self.items
|
|
]
|
|
}
|
|
|
|
def to_placeholder_string(self) -> str:
|
|
return json.dumps(self.to_placeholder_struct())
|
|
|
|
@classmethod
|
|
def from_placeholder_string(cls,
|
|
placeholder_string: str) -> 'ConcatPlaceholder':
|
|
placeholder_struct = json_load_nested_placeholder_aware(
|
|
placeholder_string)
|
|
if isinstance(placeholder_struct, str):
|
|
items = [
|
|
maybe_convert_placeholder_string_to_placeholder(item)
|
|
for item in cls.split_cel_concat_string(placeholder_struct)
|
|
]
|
|
return cls(items=items)
|
|
elif isinstance(placeholder_struct, dict):
|
|
items = [
|
|
maybe_convert_placeholder_string_to_placeholder(item)
|
|
for item in placeholder_struct['Concat']
|
|
]
|
|
return ConcatPlaceholder(items=items)
|
|
|
|
raise ValueError
|
|
|
|
|
|
class IfPresentPlaceholder(base_model.BaseModel, Placeholder):
|
|
"""Placeholder for handling cases where an input may or may not be passed.
|
|
May contain other placeholders.
|
|
|
|
Attributes:
|
|
input_name: name of the input/output.
|
|
then: If the input/output specified in name is present
|
|
the command-line argument will be replaced at run-time by the
|
|
expanded value of then.
|
|
else_: If the input/output specified in name is not present,
|
|
the command-line argument will be replaced at run-time by the
|
|
expanded value of otherwise.
|
|
"""
|
|
input_name: str
|
|
then: List[CommandLineElement]
|
|
else_: Optional[List[CommandLineElement]] = None
|
|
_aliases = {'input_name': 'inputName', 'else_': 'else'}
|
|
|
|
@classmethod
|
|
def is_match(cls, string: str) -> bool:
|
|
try:
|
|
return "IfPresent" in json.loads(string)
|
|
except json.decoder.JSONDecodeError:
|
|
return False
|
|
|
|
def to_placeholder_struct(self) -> Dict[str, Any]:
|
|
then = [
|
|
maybe_convert_placeholder_to_placeholder_string(item)
|
|
for item in self.then
|
|
] if isinstance(self.then, list) else self.then
|
|
struct = {"IfPresent": {"InputName": self.input_name, "Then": then}}
|
|
if self.else_:
|
|
otherwise = [
|
|
maybe_convert_placeholder_to_placeholder_string(item)
|
|
for item in self.else_
|
|
] if isinstance(self.else_, list) else self.else_
|
|
struct["IfPresent"]["Else"] = otherwise
|
|
return struct
|
|
|
|
def to_placeholder_string(self) -> str:
|
|
return json.dumps(self.to_placeholder_struct())
|
|
|
|
@classmethod
|
|
def from_placeholder_string(
|
|
cks, placeholder_string: str) -> 'IfPresentPlaceholder':
|
|
struct = json_load_nested_placeholder_aware(placeholder_string)
|
|
struct_body = struct['IfPresent']
|
|
|
|
then = struct_body['Then']
|
|
then = [
|
|
maybe_convert_placeholder_string_to_placeholder(item)
|
|
for item in then
|
|
] if isinstance(then, list) else then
|
|
|
|
else_ = struct_body.get('Else')
|
|
else_ = [
|
|
maybe_convert_placeholder_string_to_placeholder(item)
|
|
for item in else_
|
|
] if isinstance(else_, list) else else_
|
|
kwargs = {
|
|
'input_name': struct_body['InputName'],
|
|
'then': then,
|
|
'else_': else_
|
|
}
|
|
return IfPresentPlaceholder(**kwargs)
|
|
|
|
def transform_else(self) -> None:
|
|
"""Use None instead of empty list for optional."""
|
|
self.else_ = None if self.else_ == [] else self.else_
|
|
|
|
|
|
class CustomizedDecoder(json.JSONDecoder):
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def parse_array(*_args, **_kwargs):
|
|
values, end = JSONArray(*_args, **_kwargs)
|
|
for i, item in enumerate(values):
|
|
if isinstance(item, dict):
|
|
values[i] = json.dumps(item)
|
|
return values, end
|
|
|
|
self.parse_array = parse_array
|
|
self.scan_once = py_make_scanner(self)
|
|
|
|
|
|
def json_load_nested_placeholder_aware(
|
|
placeholder_string: str
|
|
) -> Union[str, Dict[str, Union[str, List[str], dict]]]:
|
|
try:
|
|
return json.loads(placeholder_string, cls=CustomizedDecoder)
|
|
except json.JSONDecodeError:
|
|
return placeholder_string
|
|
|
|
|
|
def maybe_convert_placeholder_string_to_placeholder(
|
|
placeholder_string: str) -> CommandLineElement:
|
|
"""Infers if a command is a placeholder and converts it to the correct
|
|
Placeholder object.
|
|
|
|
Args:
|
|
arg (str): The arg or command to possibly convert.
|
|
|
|
Returns:
|
|
CommandLineElement: The converted command or original string.
|
|
"""
|
|
if not placeholder_string.startswith('{'):
|
|
return placeholder_string
|
|
|
|
# order matters here!
|
|
from_string_placeholders = [
|
|
IfPresentPlaceholder,
|
|
ConcatPlaceholder,
|
|
InputValuePlaceholder,
|
|
InputPathPlaceholder,
|
|
InputUriPlaceholder,
|
|
OutputPathPlaceholder,
|
|
OutputUriPlaceholder,
|
|
OutputParameterPlaceholder,
|
|
]
|
|
for placeholder_struct in from_string_placeholders:
|
|
if placeholder_struct.is_match(placeholder_string):
|
|
return placeholder_struct.from_placeholder_string(
|
|
placeholder_string)
|
|
return placeholder_string
|
|
|
|
|
|
def maybe_convert_placeholder_to_placeholder_string(
|
|
placeholder: CommandLineElement) -> str:
|
|
"""Converts a placeholder to a placeholder string if it's a subclass of
|
|
Placeholder.
|
|
|
|
Args:
|
|
placeholder (Placeholder): The placeholder to convert.
|
|
|
|
Returns:
|
|
str: The placeholder string.
|
|
"""
|
|
if isinstance(placeholder, Placeholder):
|
|
return placeholder.to_placeholder_struct() if hasattr(
|
|
placeholder,
|
|
'to_placeholder_struct') else placeholder.to_placeholder_string()
|
|
return placeholder
|