# Copyright 2018 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import time import logging import json import os import tarfile import zipfile import yaml from datetime import datetime import kfp_server_api from .compiler import compiler from .compiler import _k8s_helper from ._auth import get_auth_token class Client(object): """ API Client for KubeFlow Pipeline. """ # in-cluster DNS name of the pipeline service IN_CLUSTER_DNS_NAME = 'ml-pipeline.kubeflow.svc.cluster.local:8888' def __init__(self, host=None, client_id=None): """Create a new instance of kfp client. Args: host: the host name to use to talk to Kubeflow Pipelines. If not set, the in-cluster service DNS name will be used, which only works if the current environment is a pod in the same cluster (such as a Jupyter instance spawned by Kubeflow's JupyterHub). If you have a different connection to cluster, such as a kubectl proxy connection, then set it to something like "127.0.0.1:8080/pipeline. If you connect to an IAP enabled cluster, set it to https://.endpoints..cloud.goog/pipeline". client_id: The client ID used by Identity-Aware Proxy. """ self._host = host token = None if host and client_id: token = get_auth_token(client_id) config = kfp_server_api.configuration.Configuration() config.host = host if host else Client.IN_CLUSTER_DNS_NAME self._configure_auth(config, token) api_client = kfp_server_api.api_client.ApiClient(config) self._run_api = kfp_server_api.api.run_service_api.RunServiceApi(api_client) self._experiment_api = kfp_server_api.api.experiment_service_api.ExperimentServiceApi(api_client) def _configure_auth(self, config, token): if token: config.api_key['authorization'] = token config.api_key_prefix['authorization'] = 'Bearer' def _is_ipython(self): """Returns whether we are running in notebook.""" try: import IPython ipy = IPython.get_ipython() if ipy is None: return False except ImportError: return False return True def _get_url_prefix(self): if self._host: # User's own connection. if self._host.startswith('http://') or self._host.startswith('https://'): return self._host else: return 'http://' + self._host # In-cluster pod. We could use relative URL. return '/pipeline' def create_experiment(self, name): """Create a new experiment. Args: name: the name of the experiment. Returns: An Experiment object. Most important field is id. """ experiment = None try: experiment = self.get_experiment(experiment_name=name) except: # Ignore error if the experiment does not exist. pass if not experiment: logging.info('Creating experiment {}.'.format(name)) experiment = kfp_server_api.models.ApiExperiment(name=name) experiment = self._experiment_api.create_experiment(body=experiment) if self._is_ipython(): import IPython html = \ ('Experiment link here' % (self._get_url_prefix(), experiment.id)) IPython.display.display(IPython.display.HTML(html)) return experiment def list_experiments(self, page_token='', page_size=10, sort_by=''): """List experiments. Args: page_token: token for starting of the page. page_size: size of the page. sort_by: can be '[field_name]', '[field_name] des'. For example, 'name des'. Returns: A response object including a list of experiments and next page token. """ response = self._experiment_api.list_experiment( page_token=page_token, page_size=page_size, sort_by=sort_by) return response def get_experiment(self, experiment_id=None, experiment_name=None): """Get details of an experiment Either experiment_id or experiment_name is required Args: experiment_id: id of the experiment. (Optional) experiment_name: name of the experiment. (Optional) Returns: A response object including details of a experiment. Throws: Exception if experiment is not found or None of the arguments is provided """ if experiment_id is None and experiment_name is None: raise ValueError('Either experiment_id or experiment_name is required') if experiment_id is not None: return self._experiment_api.get_experiment(id=experiment_id) next_page_token = '' while next_page_token is not None: list_experiments_response = self.list_experiments(page_size=100, page_token=next_page_token) next_page_token = list_experiments_response.next_page_token for experiment in list_experiments_response.experiments: if experiment.name == experiment_name: return self._experiment_api.get_experiment(id=experiment.id) raise ValueError('No experiment is found with name {}.'.format(experiment_name)) def _extract_pipeline_yaml(self, package_file): def _choose_pipeline_yaml_file(file_list) -> str: yaml_files = [file for file in file_list if file.endswith('.yaml')] if len(yaml_files) == 0: raise ValueError('Invalid package. Missing pipeline yaml file in the package.') if 'pipeline.yaml' in yaml_files: return 'pipeline.yaml' else: if len(yaml_files) == 1: return yaml_files[0] raise ValueError('Invalid package. There is no pipeline.yaml file and there are multiple yaml files.') if package_file.endswith('.tar.gz') or package_file.endswith('.tgz'): with tarfile.open(package_file, "r:gz") as tar: file_names = [member.name for member in tar if member.isfile()] pipeline_yaml_file = _choose_pipeline_yaml_file(file_names) with tar.extractfile(tar.getmember(pipeline_yaml_file)) as f: return yaml.safe_load(f) elif package_file.endswith('.zip'): with zipfile.ZipFile(package_file, 'r') as zip: pipeline_yaml_file = _choose_pipeline_yaml_file(zip.namelist()) with zip.open(pipeline_yaml_file) as f: return yaml.safe_load(f) elif package_file.endswith('.yaml') or package_file.endswith('.yml'): with open(package_file, 'r') as f: return yaml.safe_load(f) else: raise ValueError('The package_file '+ package_file + ' should ends with one of the following formats: [.tar.gz, .tgz, .zip, .yaml, .yml]') def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None): """Run a specified pipeline. Args: experiment_id: The string id of an experiment. job_name: name of the job. pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). params: a dictionary with key (string) as param name and value (string) as as param value. pipeline_id: the string ID of a pipeline. Returns: A run object. Most important field is id. """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [kfp_server_api.ApiParameter(name=_k8s_helper.K8sHelper.sanitize_k8s_name(k), value=str(v)) for k,v in params.items()] key = kfp_server_api.models.ApiResourceKey(id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference(key, kfp_server_api.models.ApiRelationship.OWNER) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) run_body = kfp_server_api.models.ApiRun( pipeline_spec=spec, resource_references=[reference], name=job_name) response = self._run_api.create_run(body=run_body) if self._is_ipython(): import IPython html = ('Run link here' % (self._get_url_prefix(), response.run.id)) IPython.display.display(IPython.display.HTML(html)) return response.run def list_runs(self, page_token='', page_size=10, sort_by='', experiment_id=None): """List runs. Args: page_token: token for starting of the page. page_size: size of the page. sort_by: one of 'field_name', 'field_name des'. For example, 'name des'. experiment_id: experiment id to filter upon Returns: A response object including a list of experiments and next page token. """ if experiment_id is not None: response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by, resource_reference_key_type=kfp_server_api.models.api_resource_type.ApiResourceType.EXPERIMENT, resource_reference_key_id=experiment_id) else: response = self._run_api.list_runs(page_token=page_token, page_size=page_size, sort_by=sort_by) return response def get_run(self, run_id): """Get run details. Args: id of the run. Returns: A response object including details of a run. Throws: Exception if run is not found. """ return self._run_api.get_run(run_id=run_id) def wait_for_run_completion(self, run_id, timeout): """Wait for a run to complete. Args: run_id: run id, returned from run_pipeline. timeout: timeout in seconds. Returns: A run detail object: Most important fields are run and pipeline_runtime """ status = 'Running:' start_time = datetime.now() while status is None or status.lower() not in ['succeeded', 'failed', 'skipped', 'error']: get_run_response = self._run_api.get_run(run_id=run_id) status = get_run_response.run.status elapsed_time = (datetime.now() - start_time).seconds logging.info('Waiting for the job to complete...') if elapsed_time > timeout: raise TimeoutError('Run timeout') time.sleep(5) return get_run_response def _get_workflow_json(self, run_id): """Get the workflow json. Args: run_id: run id, returned from run_pipeline. Returns: workflow: json workflow """ get_run_response = self._run_api.get_run(run_id=run_id) workflow = get_run_response.pipeline_runtime.workflow_manifest workflow_json = json.loads(workflow) return workflow_json