This commit is contained in:
connor-mccarthy 2024-03-14 14:47:40 -07:00
parent 63ae8509e3
commit cab955f7e3
4 changed files with 143 additions and 70 deletions

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.8.14

52
cj.py Normal file
View File

@ -0,0 +1,52 @@
from google.cloud import aiplatform
project = 'managed-pipeline-test'
location = 'us-central1'
client = aiplatform.gapic.JobServiceClient(
client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"})
# Configure the custom job
custom_job = {
"workerPoolSpecs": [{
"machineSpec": {
"machineType": "e2-standard-4"
},
"replicaCount": "1",
"diskSpec": {
"bootDiskType": "pd-ssd",
"bootDiskSizeGb": 100
},
"containerSpec": {
"imageUri":
"python:3.7",
"command": [
"sh", "-c",
"\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==2.6.0' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"$0\" \"$@\"\n",
"sh", "-ec",
"program_path=$(mktemp -d)\n\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\n_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
"\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef test_add_1(a: int, b: int) -> int:\n if a > 2:\n sys.exit(\"a is greater than 2, invalid\")\n return a + b\n\n"
],
"args": [
"--executor_input",
"{\"inputs\":{\"parameterValues\":{\"a\":3,\"b\":2},\"parameters\":{\"a\":{\"intValue\":\"3\"},\"b\":{\"intValue\":\"2\"}}},\"outputs\":{\"outputFile\":\"/gcs/rickyxie-test/186556260430/hello-world-20240314011708/test-add-1_-5950113757618241536/executor_output.json\",\"parameters\":{\"Output\":{\"outputFile\":\"/gcs/rickyxie-test/186556260430/hello-world-20240314011708/test-add-1_-5950113757618241536/Output\"}}}}",
"--function_to_execute", "test_add_1"
],
"env": [{
"name":
"VERTEX_AI_PIPELINES_RUN_LABELS",
"value":
"{\"vertex-ai-pipelines-run-billing-id\":\"8717761889699889152\"}"
}]
}
}],
"scheduling": {
"disableRetries": True
},
"serviceAccount": "186556260430-compute@developer.gserviceaccount.com"
}
parent = f"projects/{project}/locations/{location}"
response = client.create_custom_job(parent=parent, custom_job=custom_job)
print("Custom job name:", response.name)

60
f.py
View File

@ -1,68 +1,26 @@
from kfp import dsl
from kfp import local
from kfp.dsl import Artifact
from kfp.dsl import Output
local.init(runner=local.SubprocessRunner())
project = '<your-project-here>'
source = """
def main():
import sys
import json
import os
executor_input = sys.argv[1]
my_custom_uri = 'gs://{project}/foo/bar/blah.txt'
local_path = my_custom_uri.replace('gs://', '/gcs/')
# write artifact
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, 'w+') as f:
f.write('my custom artifact')
from kfp.dsl import *
from typing import *
# tell Pipelines backend where you wrote it
executor_input_struct = json.loads(executor_input)
artifact_to_override = executor_input_struct['outputs']['artifacts']['a']
executor_output_path = executor_input_struct['outputs']['outputFile']
artifact_to_override['artifacts'][0]['uri'] = my_custom_uri
updated_executor_output = {'artifacts': {'a': artifact_to_override}}
os.makedirs(os.path.dirname(executor_output_path), exist_ok=True)
with open(executor_output_path, 'w+') as f:
json.dump(updated_executor_output, f)
main()
"""
@dsl.container_component
def comp(
a: Output[Artifact],
executor_input: str = dsl.PIPELINE_TASK_EXECUTOR_INPUT_PLACEHOLDER,
):
return dsl.ContainerSpec(
image='python:3.8',
command=['python', '-c'],
args=[source, executor_input],
)
@dsl.component
def identity(string: str) -> str:
raise Exception
return string
@dsl.pipeline
def my_pipeline():
comp()
def my_pipeline(string: str = 'string'):
op1 = identity(string=string)
# my_pipeline()
if __name__ == '__main__':
import datetime
import warnings
import webbrowser
from google.cloud import aiplatform
from kfp import compiler
warnings.filterwarnings('ignore')

100
x.py
View File

@ -1,3 +1,4 @@
import json
from typing import *
from kfp import dsl
@ -48,8 +49,6 @@ def DataflowFlexTemplateJobOp(
'project': project,
'location': location,
'outputs': {
# backend handles persisting outputs
# TODO: detail how you could add new outputs with this approach
'gcp_resources': gcp_resources
},
'body': {
@ -130,11 +129,7 @@ def ModelGetOp(
'location': location,
'body': {
'name': {
<<<<<<< Updated upstream
f'projects/{project}/locations/{location}/models/{model_name}'
=======
f'projects/{project}/locations/{location}/models/hotd{model_name}'
>>>>>>> Stashed changes
}
},
'outputs': {
@ -143,6 +138,57 @@ def ModelGetOp(
})
# 1: return full model
# cons:
# - asymmetrtical interface: curated set of inputs, but full blob output
# - breaking change for return
# 2: return select fields
# cons:
# - expressiveness limitations? need to express name/URI/metadata declaratively
# 3: return full
# cons: curated set of inputs, but full blob output
@dsl.container_component
def ModelGetOp(
model: dsl.Output[VertexModel],
model_name: str,
project: str = PROJECT_ID_PLACEHOLDER,
location: str = 'us-central1',
):
# use $response to represent the response variable to which the CEL is applied
name = model.name
uri = f'https://{location}-aiplatform.googleapis.com/v1/ + $response.name'
metadata = {'resourceName': '$response.name'}
return dsl.PlatformComponent(
platform='google_cloud',
config={
'task_type':
'http',
'method':
'GET',
'endpoint':
f'https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/models/{model_name}',
'outputs': {
'parameters': {
'example_param': {
'destination': model,
'cel': '$response.name',
},
'artifacts': {
'model': [{
'name': name,
'uri': uri,
'metadata': metadata,
}]
}
}
},
})
@dsl.platform_component
def DataflowFlexTemplateJobOp(
container_spec_gcs_path: str,
@ -267,26 +313,42 @@ def DataflowFlexTemplateJobOp(
# no obvious way to instruct the backend to parse the body to create outputs
@dsl.container_component
def ModelGetOp(
model: dsl.Output[VertexModel],
model_name: str,
project: str = PROJECT_ID_PLACEHOLDER,
@kfp.platforms.platform_component
def TuningOp(
model_template: str,
finetuning_steps: int,
inputs_length: int,
targets_length: int,
accelerator_count: int = 8,
replica_count: int = 1,
gcp_resources: dsl.OutputPath(str),
saved_model: dsl.Output[dsl.Artifact],
project: str,
location: str = 'us-central1',
accelerator_type: str = 'TPU_V2',
machine_type: str = 'cloud-tpu',
):
return dsl.PlatformComponent(
return kfp.platforms.PlatformComponent(
platform='google_cloud',
config={
'project': project,
'location': location,
'model_get_op': {
'body': {
'name': {
f'projects/{project}/locations/{location}/models/{model_name}'
}
}
'tuning_op': {
# in practice this will not be a flat struct
'model_template': model_template,
'finetuning_steps': finetuning_steps,
'inputs_length': inputs_length,
'targets_length': targets_length,
'accelerator_count': accelerator_count,
'replica_count': replica_count,
'accelerator_type': accelerator_type,
'machine_type': machine_type,
},
'outputs': {
'model': model
'gcp_resources': gcp_resources,
'saved_model': saved_model,
'saved_model': saved_model,
},
# include version, since is no longer provided by the GCPC image tag
'version': gcpc.__version__,
})