wip
This commit is contained in:
parent
63ae8509e3
commit
cab955f7e3
|
|
@ -0,0 +1 @@
|
|||
3.8.14
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
from google.cloud import aiplatform
|
||||
|
||||
project = 'managed-pipeline-test'
|
||||
location = 'us-central1'
|
||||
|
||||
client = aiplatform.gapic.JobServiceClient(
|
||||
client_options={"api_endpoint": f"{location}-aiplatform.googleapis.com"})
|
||||
|
||||
# Configure the custom job
|
||||
custom_job = {
|
||||
"workerPoolSpecs": [{
|
||||
"machineSpec": {
|
||||
"machineType": "e2-standard-4"
|
||||
},
|
||||
"replicaCount": "1",
|
||||
"diskSpec": {
|
||||
"bootDiskType": "pd-ssd",
|
||||
"bootDiskSizeGb": 100
|
||||
},
|
||||
"containerSpec": {
|
||||
"imageUri":
|
||||
"python:3.7",
|
||||
"command": [
|
||||
"sh", "-c",
|
||||
"\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip || python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1 python3 -m pip install --quiet --no-warn-script-location 'kfp==2.6.0' '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"$0\" \"$@\"\n",
|
||||
"sh", "-ec",
|
||||
"program_path=$(mktemp -d)\n\nprintf \"%s\" \"$0\" > \"$program_path/ephemeral_component.py\"\n_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path \"$program_path/ephemeral_component.py\" \"$@\"\n",
|
||||
"\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import *\n\ndef test_add_1(a: int, b: int) -> int:\n if a > 2:\n sys.exit(\"a is greater than 2, invalid\")\n return a + b\n\n"
|
||||
],
|
||||
"args": [
|
||||
"--executor_input",
|
||||
"{\"inputs\":{\"parameterValues\":{\"a\":3,\"b\":2},\"parameters\":{\"a\":{\"intValue\":\"3\"},\"b\":{\"intValue\":\"2\"}}},\"outputs\":{\"outputFile\":\"/gcs/rickyxie-test/186556260430/hello-world-20240314011708/test-add-1_-5950113757618241536/executor_output.json\",\"parameters\":{\"Output\":{\"outputFile\":\"/gcs/rickyxie-test/186556260430/hello-world-20240314011708/test-add-1_-5950113757618241536/Output\"}}}}",
|
||||
"--function_to_execute", "test_add_1"
|
||||
],
|
||||
"env": [{
|
||||
"name":
|
||||
"VERTEX_AI_PIPELINES_RUN_LABELS",
|
||||
"value":
|
||||
"{\"vertex-ai-pipelines-run-billing-id\":\"8717761889699889152\"}"
|
||||
}]
|
||||
}
|
||||
}],
|
||||
"scheduling": {
|
||||
"disableRetries": True
|
||||
},
|
||||
"serviceAccount": "186556260430-compute@developer.gserviceaccount.com"
|
||||
}
|
||||
|
||||
parent = f"projects/{project}/locations/{location}"
|
||||
response = client.create_custom_job(parent=parent, custom_job=custom_job)
|
||||
|
||||
print("Custom job name:", response.name)
|
||||
60
f.py
60
f.py
|
|
@ -1,68 +1,26 @@
|
|||
from kfp import dsl
|
||||
from kfp import local
|
||||
from kfp.dsl import Artifact
|
||||
from kfp.dsl import Output
|
||||
|
||||
local.init(runner=local.SubprocessRunner())
|
||||
|
||||
project = '<your-project-here>'
|
||||
|
||||
source = """
|
||||
def main():
|
||||
import sys
|
||||
import json
|
||||
import os
|
||||
|
||||
executor_input = sys.argv[1]
|
||||
|
||||
my_custom_uri = 'gs://{project}/foo/bar/blah.txt'
|
||||
local_path = my_custom_uri.replace('gs://', '/gcs/')
|
||||
|
||||
# write artifact
|
||||
os.makedirs(os.path.dirname(local_path), exist_ok=True)
|
||||
with open(local_path, 'w+') as f:
|
||||
f.write('my custom artifact')
|
||||
from kfp.dsl import *
|
||||
from typing import *
|
||||
|
||||
|
||||
# tell Pipelines backend where you wrote it
|
||||
executor_input_struct = json.loads(executor_input)
|
||||
artifact_to_override = executor_input_struct['outputs']['artifacts']['a']
|
||||
executor_output_path = executor_input_struct['outputs']['outputFile']
|
||||
artifact_to_override['artifacts'][0]['uri'] = my_custom_uri
|
||||
updated_executor_output = {'artifacts': {'a': artifact_to_override}}
|
||||
|
||||
os.makedirs(os.path.dirname(executor_output_path), exist_ok=True)
|
||||
with open(executor_output_path, 'w+') as f:
|
||||
json.dump(updated_executor_output, f)
|
||||
|
||||
main()
|
||||
"""
|
||||
|
||||
|
||||
@dsl.container_component
|
||||
def comp(
|
||||
a: Output[Artifact],
|
||||
executor_input: str = dsl.PIPELINE_TASK_EXECUTOR_INPUT_PLACEHOLDER,
|
||||
):
|
||||
return dsl.ContainerSpec(
|
||||
image='python:3.8',
|
||||
command=['python', '-c'],
|
||||
args=[source, executor_input],
|
||||
)
|
||||
@dsl.component
|
||||
def identity(string: str) -> str:
|
||||
raise Exception
|
||||
return string
|
||||
|
||||
|
||||
@dsl.pipeline
|
||||
def my_pipeline():
|
||||
comp()
|
||||
def my_pipeline(string: str = 'string'):
|
||||
op1 = identity(string=string)
|
||||
|
||||
|
||||
# my_pipeline()
|
||||
if __name__ == '__main__':
|
||||
import datetime
|
||||
import warnings
|
||||
import webbrowser
|
||||
|
||||
from google.cloud import aiplatform
|
||||
|
||||
from kfp import compiler
|
||||
|
||||
warnings.filterwarnings('ignore')
|
||||
|
|
|
|||
100
x.py
100
x.py
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
from typing import *
|
||||
|
||||
from kfp import dsl
|
||||
|
|
@ -48,8 +49,6 @@ def DataflowFlexTemplateJobOp(
|
|||
'project': project,
|
||||
'location': location,
|
||||
'outputs': {
|
||||
# backend handles persisting outputs
|
||||
# TODO: detail how you could add new outputs with this approach
|
||||
'gcp_resources': gcp_resources
|
||||
},
|
||||
'body': {
|
||||
|
|
@ -130,11 +129,7 @@ def ModelGetOp(
|
|||
'location': location,
|
||||
'body': {
|
||||
'name': {
|
||||
<<<<<<< Updated upstream
|
||||
f'projects/{project}/locations/{location}/models/{model_name}'
|
||||
=======
|
||||
f'projects/{project}/locations/{location}/models/hotd{model_name}'
|
||||
>>>>>>> Stashed changes
|
||||
}
|
||||
},
|
||||
'outputs': {
|
||||
|
|
@ -143,6 +138,57 @@ def ModelGetOp(
|
|||
})
|
||||
|
||||
|
||||
# 1: return full model
|
||||
# cons:
|
||||
# - asymmetrtical interface: curated set of inputs, but full blob output
|
||||
# - breaking change for return
|
||||
|
||||
# 2: return select fields
|
||||
# cons:
|
||||
# - expressiveness limitations? need to express name/URI/metadata declaratively
|
||||
|
||||
# 3: return full
|
||||
# cons: curated set of inputs, but full blob output
|
||||
|
||||
|
||||
@dsl.container_component
|
||||
def ModelGetOp(
|
||||
model: dsl.Output[VertexModel],
|
||||
model_name: str,
|
||||
project: str = PROJECT_ID_PLACEHOLDER,
|
||||
location: str = 'us-central1',
|
||||
):
|
||||
# use $response to represent the response variable to which the CEL is applied
|
||||
name = model.name
|
||||
uri = f'https://{location}-aiplatform.googleapis.com/v1/ + $response.name'
|
||||
metadata = {'resourceName': '$response.name'}
|
||||
return dsl.PlatformComponent(
|
||||
platform='google_cloud',
|
||||
config={
|
||||
'task_type':
|
||||
'http',
|
||||
'method':
|
||||
'GET',
|
||||
'endpoint':
|
||||
f'https://{location}-aiplatform.googleapis.com/v1/projects/{project}/locations/{location}/models/{model_name}',
|
||||
'outputs': {
|
||||
'parameters': {
|
||||
'example_param': {
|
||||
'destination': model,
|
||||
'cel': '$response.name',
|
||||
},
|
||||
'artifacts': {
|
||||
'model': [{
|
||||
'name': name,
|
||||
'uri': uri,
|
||||
'metadata': metadata,
|
||||
}]
|
||||
}
|
||||
}
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
@dsl.platform_component
|
||||
def DataflowFlexTemplateJobOp(
|
||||
container_spec_gcs_path: str,
|
||||
|
|
@ -267,26 +313,42 @@ def DataflowFlexTemplateJobOp(
|
|||
# no obvious way to instruct the backend to parse the body to create outputs
|
||||
|
||||
|
||||
@dsl.container_component
|
||||
def ModelGetOp(
|
||||
model: dsl.Output[VertexModel],
|
||||
model_name: str,
|
||||
project: str = PROJECT_ID_PLACEHOLDER,
|
||||
@kfp.platforms.platform_component
|
||||
def TuningOp(
|
||||
model_template: str,
|
||||
finetuning_steps: int,
|
||||
inputs_length: int,
|
||||
targets_length: int,
|
||||
accelerator_count: int = 8,
|
||||
replica_count: int = 1,
|
||||
gcp_resources: dsl.OutputPath(str),
|
||||
saved_model: dsl.Output[dsl.Artifact],
|
||||
project: str,
|
||||
location: str = 'us-central1',
|
||||
accelerator_type: str = 'TPU_V2',
|
||||
machine_type: str = 'cloud-tpu',
|
||||
):
|
||||
return dsl.PlatformComponent(
|
||||
return kfp.platforms.PlatformComponent(
|
||||
platform='google_cloud',
|
||||
config={
|
||||
'project': project,
|
||||
'location': location,
|
||||
'model_get_op': {
|
||||
'body': {
|
||||
'name': {
|
||||
f'projects/{project}/locations/{location}/models/{model_name}'
|
||||
}
|
||||
}
|
||||
'tuning_op': {
|
||||
# in practice this will not be a flat struct
|
||||
'model_template': model_template,
|
||||
'finetuning_steps': finetuning_steps,
|
||||
'inputs_length': inputs_length,
|
||||
'targets_length': targets_length,
|
||||
'accelerator_count': accelerator_count,
|
||||
'replica_count': replica_count,
|
||||
'accelerator_type': accelerator_type,
|
||||
'machine_type': machine_type,
|
||||
},
|
||||
'outputs': {
|
||||
'model': model
|
||||
'gcp_resources': gcp_resources,
|
||||
'saved_model': saved_model,
|
||||
'saved_model': saved_model,
|
||||
},
|
||||
# include version, since is no longer provided by the GCPC image tag
|
||||
'version': gcpc.__version__,
|
||||
})
|
||||
|
|
|
|||
Loading…
Reference in New Issue