feat(components): Add Feature Attribution components to _implementation/model_evaluation. Add LLM Eval text generation and text classification pipelines to preview namespace init file
PiperOrigin-RevId: 557226606
This commit is contained in:
parent
ff2e002157
commit
f454a86177
|
|
@ -19,25 +19,31 @@ from google_cloud_pipeline_components._implementation.model_evaluation.data_samp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.dataset_preprocessor.component import dataset_preprocessor_error_analysis as EvaluationDatasetPreprocessorOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.error_analysis_annotation.component import error_analysis_annotation as ErrorAnalysisAnnotationOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.evaluated_annotation.component import evaluated_annotation as EvaluatedAnnotationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.evaluated_annotation.component import evaluated_annotation as EvaluatedAnnotationOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.feature_extractor.component import feature_extractor_error_analysis as FeatureExtractorOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluated_annotation.component import evaluated_annotation_import as ModelImportEvaluatedAnnotationOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.import_evaluation.component import model_evaluation_import as ModelImportEvaluationOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.llm_classification_postprocessor.component import llm_classification_predictions_postprocessor_graph_component as LLMEvaluationClassificationPredictionsPostprocessorOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.llm_evaluation.component import model_evaluation_text_generation as LLMEvaluationTextGenerationOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.evaluation_llm_safety_bias_pipeline import evaluation_llm_safety_bias_pipeline
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.target_field_data_remover.component import target_field_data_remover as TargetFieldDataRemoverOp
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'evaluation_llm_safety_bias_pipeline',
|
||||||
'EvaluationDataSamplerOp',
|
'EvaluationDataSamplerOp',
|
||||||
'EvaluationDatasetPreprocessorOp',
|
'EvaluationDatasetPreprocessorOp',
|
||||||
'ErrorAnalysisAnnotationOp',
|
'ErrorAnalysisAnnotationOp',
|
||||||
'EvaluatedAnnotationOp',
|
'EvaluatedAnnotationOp',
|
||||||
|
'FeatureAttributionGraphComponentOp',
|
||||||
'FeatureExtractorOp',
|
'FeatureExtractorOp',
|
||||||
'LLMEvaluationClassificationPredictionsPostprocessorOp',
|
'LLMEvaluationClassificationPredictionsPostprocessorOp',
|
||||||
'LLMEvaluationTextGenerationOp',
|
'LLMEvaluationTextGenerationOp',
|
||||||
|
'LLMSafetyBiasMetricsOp',
|
||||||
|
'ModelEvaluationFeatureAttributionOp',
|
||||||
'ModelImportEvaluatedAnnotationOp',
|
'ModelImportEvaluatedAnnotationOp',
|
||||||
'ModelImportEvaluationOp',
|
'ModelImportEvaluationOp',
|
||||||
'LLMSafetyBiasMetricsOp',
|
|
||||||
'TargetFieldDataRemoverOp',
|
'TargetFieldDataRemoverOp',
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Google Cloud Pipeline Evaluation Feature Extractor Component."""
|
||||||
|
|
@ -0,0 +1,179 @@
|
||||||
|
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from google_cloud_pipeline_components import _placeholders
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation import version
|
||||||
|
from google_cloud_pipeline_components.types.artifact_types import BQTable
|
||||||
|
from kfp.dsl import Artifact
|
||||||
|
from kfp.dsl import ConcatPlaceholder
|
||||||
|
from kfp.dsl import container_component
|
||||||
|
from kfp.dsl import ContainerSpec
|
||||||
|
from kfp.dsl import IfPresentPlaceholder
|
||||||
|
from kfp.dsl import Input
|
||||||
|
from kfp.dsl import Metrics
|
||||||
|
from kfp.dsl import Output
|
||||||
|
from kfp.dsl import OutputPath
|
||||||
|
from kfp.dsl import PIPELINE_JOB_ID_PLACEHOLDER
|
||||||
|
from kfp.dsl import PIPELINE_ROOT_PLACEHOLDER
|
||||||
|
from kfp.dsl import PIPELINE_TASK_ID_PLACEHOLDER
|
||||||
|
|
||||||
|
|
||||||
|
@container_component
|
||||||
|
def feature_attribution(
|
||||||
|
gcp_resources: OutputPath(str),
|
||||||
|
feature_attributions: Output[Metrics],
|
||||||
|
problem_type: str,
|
||||||
|
location: str = 'us-central1',
|
||||||
|
predictions_format: str = 'jsonl',
|
||||||
|
predictions_gcs_source: Input[Artifact] = None,
|
||||||
|
predictions_bigquery_source: Input[BQTable] = None,
|
||||||
|
dataflow_service_account: str = '',
|
||||||
|
dataflow_disk_size_gb: int = 50,
|
||||||
|
dataflow_machine_type: str = 'n1-standard-4',
|
||||||
|
dataflow_workers_num: int = 1,
|
||||||
|
dataflow_max_workers_num: int = 5,
|
||||||
|
dataflow_subnetwork: str = '',
|
||||||
|
dataflow_use_public_ips: bool = True,
|
||||||
|
encryption_spec_key_name: str = '',
|
||||||
|
force_runner_mode: str = '',
|
||||||
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
||||||
|
):
|
||||||
|
# fmt: off
|
||||||
|
"""Compute feature attribution on a trained model's batch explanation
|
||||||
|
results.
|
||||||
|
|
||||||
|
Creates a dataflow job with Apache Beam and TFMA to compute feature
|
||||||
|
attributions. Will compute feature attribution for every target label if
|
||||||
|
possible, typically possible for AutoML Classification models.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: Location running feature attribution. If not
|
||||||
|
set, defaulted to `us-central1`.
|
||||||
|
problem_type: Problem type of the pipeline: one of `classification`,
|
||||||
|
`regression` and `forecasting`.
|
||||||
|
predictions_format: The file format for the batch
|
||||||
|
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed
|
||||||
|
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.
|
||||||
|
predictions_gcs_source: An artifact with its
|
||||||
|
URI pointing toward a GCS directory with prediction or explanation files
|
||||||
|
to be used for this evaluation. For prediction results, the files should
|
||||||
|
be named "prediction.results-*" or "predictions_". For explanation
|
||||||
|
results, the files should be named "explanation.results-*".
|
||||||
|
predictions_bigquery_source: BigQuery table
|
||||||
|
with prediction or explanation data to be used for this evaluation. For
|
||||||
|
prediction results, the table column should be named "predicted_*".
|
||||||
|
dataflow_service_account: Service account to run the
|
||||||
|
dataflow job. If not set, dataflow will use the default worker service
|
||||||
|
account. For more details, see
|
||||||
|
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account
|
||||||
|
dataflow_disk_size_gb: The disk size (in GB) of the machine
|
||||||
|
executing the evaluation run. If not set, defaulted to `50`.
|
||||||
|
dataflow_machine_type: The machine type executing the
|
||||||
|
evaluation run. If not set, defaulted to `n1-standard-4`.
|
||||||
|
dataflow_workers_num: The number of workers executing the
|
||||||
|
evaluation run. If not set, defaulted to `10`.
|
||||||
|
dataflow_max_workers_num: The max number of workers
|
||||||
|
executing the evaluation run. If not set, defaulted to `25`.
|
||||||
|
dataflow_subnetwork: Dataflow's fully qualified subnetwork
|
||||||
|
name, when empty the default subnetwork will be used. More details:
|
||||||
|
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications
|
||||||
|
dataflow_use_public_ips: Specifies whether Dataflow
|
||||||
|
workers use public IP addresses.
|
||||||
|
encryption_spec_key_name: Customer-managed encryption key
|
||||||
|
for the Dataflow job. If this is set, then all resources created by the
|
||||||
|
Dataflow job will be encrypted with the provided encryption key.
|
||||||
|
force_runner_mode: Flag to choose Beam runner. Valid options are `DirectRunner`
|
||||||
|
and `Dataflow`.
|
||||||
|
project: Project to run feature attribution container. Defaults to the project in which the PipelineJob is run.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
gcs_output_directory: JsonArray of the downsampled dataset GCS
|
||||||
|
output.
|
||||||
|
bigquery_output_table: String of the downsampled dataset BigQuery
|
||||||
|
output.
|
||||||
|
gcp_resources: Serialized gcp_resources proto tracking the dataflow
|
||||||
|
job. For more details, see
|
||||||
|
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.
|
||||||
|
"""
|
||||||
|
# fmt: on
|
||||||
|
return ContainerSpec(
|
||||||
|
image=version.EVAL_IMAGE_TAG,
|
||||||
|
command=[
|
||||||
|
'python3',
|
||||||
|
'/main.py',
|
||||||
|
],
|
||||||
|
args=[
|
||||||
|
'--task',
|
||||||
|
'explanation',
|
||||||
|
'--setup_file',
|
||||||
|
'/setup.py',
|
||||||
|
'--project_id',
|
||||||
|
project,
|
||||||
|
'--location',
|
||||||
|
location,
|
||||||
|
'--problem_type',
|
||||||
|
problem_type,
|
||||||
|
'--root_dir',
|
||||||
|
f'{PIPELINE_ROOT_PLACEHOLDER}/{PIPELINE_JOB_ID_PLACEHOLDER}-{PIPELINE_TASK_ID_PLACEHOLDER}',
|
||||||
|
'--batch_prediction_format',
|
||||||
|
predictions_format,
|
||||||
|
IfPresentPlaceholder(
|
||||||
|
input_name='predictions_gcs_source',
|
||||||
|
then=[
|
||||||
|
'--batch_prediction_gcs_source',
|
||||||
|
predictions_gcs_source.uri,
|
||||||
|
],
|
||||||
|
),
|
||||||
|
IfPresentPlaceholder(
|
||||||
|
input_name='predictions_bigquery_source',
|
||||||
|
then=[
|
||||||
|
'--batch_prediction_bigquery_source',
|
||||||
|
ConcatPlaceholder([
|
||||||
|
'bq://',
|
||||||
|
predictions_bigquery_source.metadata['projectId'],
|
||||||
|
'.',
|
||||||
|
predictions_bigquery_source.metadata['datasetId'],
|
||||||
|
'.',
|
||||||
|
predictions_bigquery_source.metadata['tableId'],
|
||||||
|
]),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
'--dataflow_job_prefix',
|
||||||
|
f'evaluation-feautre-attribution-{PIPELINE_JOB_ID_PLACEHOLDER}-{PIPELINE_TASK_ID_PLACEHOLDER}',
|
||||||
|
'--dataflow_service_account',
|
||||||
|
dataflow_service_account,
|
||||||
|
'--dataflow_disk_size',
|
||||||
|
dataflow_disk_size_gb,
|
||||||
|
'--dataflow_machine_type',
|
||||||
|
dataflow_machine_type,
|
||||||
|
'--dataflow_workers_num',
|
||||||
|
dataflow_workers_num,
|
||||||
|
'--dataflow_max_workers_num',
|
||||||
|
dataflow_max_workers_num,
|
||||||
|
'--dataflow_subnetwork',
|
||||||
|
dataflow_subnetwork,
|
||||||
|
'--dataflow_use_public_ips',
|
||||||
|
dataflow_use_public_ips,
|
||||||
|
'--kms_key_name',
|
||||||
|
encryption_spec_key_name,
|
||||||
|
'--force_runner_mode',
|
||||||
|
force_runner_mode,
|
||||||
|
'--gcs_output_path',
|
||||||
|
feature_attributions.path,
|
||||||
|
'--gcp_resources',
|
||||||
|
gcp_resources,
|
||||||
|
'--executor_input',
|
||||||
|
'{{$}}',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
@ -0,0 +1,247 @@
|
||||||
|
# Copyright 2023 The Kubeflow Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
"""Graph Component for feature attribution evaluation."""
|
||||||
|
|
||||||
|
from typing import List, NamedTuple
|
||||||
|
|
||||||
|
from google_cloud_pipeline_components import _placeholders
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation.data_sampler.component import evaluation_data_sampler as EvaluationDataSamplerOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation.feature_attribution.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
||||||
|
from google_cloud_pipeline_components.types.artifact_types import VertexModel
|
||||||
|
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
||||||
|
import kfp
|
||||||
|
|
||||||
|
|
||||||
|
@kfp.dsl.pipeline(name='feature-attribution-graph-component')
|
||||||
|
def feature_attribution_graph_component( # pylint: disable=dangerous-default-value
|
||||||
|
location: str,
|
||||||
|
prediction_type: str,
|
||||||
|
vertex_model: VertexModel,
|
||||||
|
batch_predict_instances_format: str,
|
||||||
|
batch_predict_gcs_destination_output_uri: str,
|
||||||
|
batch_predict_gcs_source_uris: List[str] = [], # pylint: disable=g-bare-generic
|
||||||
|
batch_predict_bigquery_source_uri: str = '',
|
||||||
|
batch_predict_predictions_format: str = 'jsonl',
|
||||||
|
batch_predict_bigquery_destination_output_uri: str = '',
|
||||||
|
batch_predict_machine_type: str = 'n1-standard-16',
|
||||||
|
batch_predict_starting_replica_count: int = 5,
|
||||||
|
batch_predict_max_replica_count: int = 10,
|
||||||
|
batch_predict_explanation_metadata: dict = {}, # pylint: disable=g-bare-generic
|
||||||
|
batch_predict_explanation_parameters: dict = {}, # pylint: disable=g-bare-generic
|
||||||
|
batch_predict_explanation_data_sample_size: int = 10000,
|
||||||
|
batch_predict_accelerator_type: str = '',
|
||||||
|
batch_predict_accelerator_count: int = 0,
|
||||||
|
dataflow_machine_type: str = 'n1-standard-4',
|
||||||
|
dataflow_max_num_workers: int = 5,
|
||||||
|
dataflow_disk_size_gb: int = 50,
|
||||||
|
dataflow_service_account: str = '',
|
||||||
|
dataflow_subnetwork: str = '',
|
||||||
|
dataflow_use_public_ips: bool = True,
|
||||||
|
encryption_spec_key_name: str = '',
|
||||||
|
force_runner_mode: str = '',
|
||||||
|
project: str = _placeholders.PROJECT_ID_PLACEHOLDER,
|
||||||
|
) -> NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics):
|
||||||
|
"""A pipeline to compute feature attributions by sampling data for batch explanations.
|
||||||
|
|
||||||
|
This pipeline guarantees support for AutoML Tabular models that contain a
|
||||||
|
valid explanation_spec.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
location: The GCP region that runs the pipeline components.
|
||||||
|
prediction_type: The type of prediction the model is to produce.
|
||||||
|
"classification", "regression", or "forecasting".
|
||||||
|
vertex_model: The Vertex model artifact used for batch explanation.
|
||||||
|
batch_predict_instances_format: The format in which instances are given,
|
||||||
|
must be one of the Model's supportedInputStorageFormats. For more details
|
||||||
|
about this input config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
|
||||||
|
batch_predict_gcs_destination_output_uri: The Google Cloud Storage location
|
||||||
|
of the directory where the output is to be written to. In the given
|
||||||
|
directory a new directory is created. Its name is
|
||||||
|
``prediction-<model-display-name>-<job-create-time>``, where timestamp is
|
||||||
|
in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files
|
||||||
|
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, ...,
|
||||||
|
``predictions_N.<extension>`` are created where ``<extension>`` depends on
|
||||||
|
chosen ``predictions_format``, and N may equal 0001 and depends on the
|
||||||
|
total number of successfully predicted instances. If the Model has both
|
||||||
|
``instance`` and ``prediction`` schemata defined then each such file
|
||||||
|
contains predictions as per the ``predictions_format``. If prediction for
|
||||||
|
any instance failed (partially or completely), then an additional
|
||||||
|
``errors_0001.<extension>``, ``errors_0002.<extension>``,...,
|
||||||
|
``errors_N.<extension>`` files are created (N depends on total number of
|
||||||
|
failed predictions). These files contain the failed instances, as per
|
||||||
|
their schema, followed by an additional ``error`` field which as value has
|
||||||
|
``google.rpc.Status`` containing only ``code`` and ``message`` fields. For
|
||||||
|
more details about this output config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.
|
||||||
|
batch_predict_gcs_source_uris: Google Cloud Storage URI(-s) to your
|
||||||
|
instances to run batch prediction on. May contain wildcards. For more
|
||||||
|
information on wildcards, see
|
||||||
|
https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. For
|
||||||
|
more details about this input config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
|
||||||
|
batch_predict_bigquery_source_uri: Google BigQuery URI to your instances to
|
||||||
|
run batch prediction on. May contain wildcards. For more details about
|
||||||
|
this input config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.
|
||||||
|
batch_predict_predictions_format: The format in which Vertex AI gives the
|
||||||
|
predictions. Must be one of the Model's supportedOutputStorageFormats. For
|
||||||
|
more details about this output config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.
|
||||||
|
batch_predict_bigquery_destination_output_uri: The BigQuery project location
|
||||||
|
where the output is to be written to. In the given project a new dataset
|
||||||
|
is created with name ``prediction_<model-display-name>_<job-create-time>``
|
||||||
|
where is made BigQuery-dataset-name compatible (for example, most special
|
||||||
|
characters become underscores), and timestamp is in
|
||||||
|
YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two
|
||||||
|
tables will be created, ``predictions``, and ``errors``. If the Model has
|
||||||
|
both ``instance`` and ``prediction`` schemata defined then the tables have
|
||||||
|
columns as follows: The ``predictions`` table contains instances for which
|
||||||
|
the prediction succeeded, it has columns as per a concatenation of the
|
||||||
|
Model's instance and prediction schemata. The ``errors`` table contains
|
||||||
|
rows for which the prediction has failed, it has instance columns, as per
|
||||||
|
the instance schema, followed by a single "errors" column, which as values
|
||||||
|
has ````google.rpc.Status`` <Status>``__ represented as a STRUCT, and
|
||||||
|
containing only ``code`` and ``message``. For more details about this
|
||||||
|
output config, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.
|
||||||
|
batch_predict_machine_type: The type of machine for running batch prediction
|
||||||
|
on dedicated resources. If the Model supports DEDICATED_RESOURCES this
|
||||||
|
config may be provided (and the job will use these resources). If the
|
||||||
|
Model doesn't support AUTOMATIC_RESOURCES, this config must be provided.
|
||||||
|
For more details about the BatchDedicatedResources, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources.
|
||||||
|
For more details about the machine spec, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec
|
||||||
|
batch_predict_starting_replica_count: The number of machine replicas used at
|
||||||
|
the start of the batch operation. If not set, Vertex AI decides starting
|
||||||
|
number, not greater than ``max_replica_count``. Only used if
|
||||||
|
``machine_type`` is set.
|
||||||
|
batch_predict_max_replica_count: The maximum number of machine replicas the
|
||||||
|
batch operation may be scaled to. Only used if ``machine_type`` is set.
|
||||||
|
batch_predict_explanation_metadata: Explanation metadata configuration for
|
||||||
|
this BatchPredictionJob. Can be specified only if ``generate_explanation``
|
||||||
|
is set to ``True``. This value overrides the value of
|
||||||
|
``Model.explanation_metadata``. All fields of ``explanation_metadata`` are
|
||||||
|
optional in the request. If a field of the ``explanation_metadata`` object
|
||||||
|
is not populated, the corresponding field of the
|
||||||
|
``Model.explanation_metadata`` object is inherited. For more details, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.
|
||||||
|
batch_predict_explanation_parameters: Parameters to configure explaining for
|
||||||
|
Model's predictions. Can be specified only if ``generate_explanation`` is
|
||||||
|
set to ``True``. This value overrides the value of
|
||||||
|
``Model.explanation_parameters``. All fields of ``explanation_parameters``
|
||||||
|
are optional in the request. If a field of the ``explanation_parameters``
|
||||||
|
object is not populated, the corresponding field of the
|
||||||
|
``Model.explanation_parameters`` object is inherited. For more details,
|
||||||
|
see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.
|
||||||
|
batch_predict_explanation_data_sample_size: Desired size to downsample the
|
||||||
|
input dataset that will then be used for batch explanation.
|
||||||
|
batch_predict_accelerator_type: The type of accelerator(s) that may be
|
||||||
|
attached to the machine as per ``batch_predict_accelerator_count``. Only
|
||||||
|
used if ``batch_predict_machine_type`` is set. For more details about the
|
||||||
|
machine spec, see
|
||||||
|
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec
|
||||||
|
batch_predict_accelerator_count: The number of accelerators to attach to the
|
||||||
|
``batch_predict_machine_type``. Only used if
|
||||||
|
``batch_predict_machine_type`` is set.
|
||||||
|
dataflow_machine_type: The Dataflow machine type for evaluation components.
|
||||||
|
dataflow_max_num_workers: The max number of Dataflow workers for evaluation
|
||||||
|
components.
|
||||||
|
dataflow_disk_size_gb: Dataflow worker's disk size in GB for evaluation
|
||||||
|
components.
|
||||||
|
dataflow_service_account: Custom service account to run Dataflow jobs.
|
||||||
|
dataflow_subnetwork: Dataflow's fully qualified subnetwork name, when empty
|
||||||
|
the default subnetwork will be used. Example:
|
||||||
|
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications
|
||||||
|
dataflow_use_public_ips: Specifies whether Dataflow workers use public IP
|
||||||
|
addresses.
|
||||||
|
encryption_spec_key_name: Customer-managed encryption key options. If set,
|
||||||
|
resources created by this pipeline will be encrypted with the provided
|
||||||
|
encryption key. Has the form:
|
||||||
|
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``.
|
||||||
|
The key needs to be in the same region as where the compute resource is
|
||||||
|
created.
|
||||||
|
force_runner_mode: Indicate the runner mode to use forcely. Valid options
|
||||||
|
are ``Dataflow`` and ``DirectRunner``.
|
||||||
|
project: The GCP project that runs the pipeline components. Defaults to the
|
||||||
|
project in which the PipelineJob is run.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A system.Metrics artifact with feature attributions.
|
||||||
|
"""
|
||||||
|
outputs = NamedTuple('outputs', feature_attributions=kfp.dsl.Metrics)
|
||||||
|
|
||||||
|
# Sample the input dataset for a quicker batch explanation.
|
||||||
|
data_sampler_task = EvaluationDataSamplerOp(
|
||||||
|
project=project,
|
||||||
|
location=location,
|
||||||
|
gcs_source_uris=batch_predict_gcs_source_uris,
|
||||||
|
bigquery_source_uri=batch_predict_bigquery_source_uri,
|
||||||
|
instances_format=batch_predict_instances_format,
|
||||||
|
sample_size=batch_predict_explanation_data_sample_size,
|
||||||
|
force_runner_mode=force_runner_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Run batch explain.
|
||||||
|
batch_explain_task = ModelBatchPredictOp(
|
||||||
|
project=project,
|
||||||
|
location=location,
|
||||||
|
model=vertex_model,
|
||||||
|
job_display_name='model-registry-batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}',
|
||||||
|
gcs_source_uris=data_sampler_task.outputs['gcs_output_directory'],
|
||||||
|
bigquery_source_input_uri=data_sampler_task.outputs[
|
||||||
|
'bigquery_output_table'
|
||||||
|
],
|
||||||
|
instances_format=batch_predict_instances_format,
|
||||||
|
predictions_format=batch_predict_predictions_format,
|
||||||
|
gcs_destination_output_uri_prefix=batch_predict_gcs_destination_output_uri,
|
||||||
|
bigquery_destination_output_uri=batch_predict_bigquery_destination_output_uri,
|
||||||
|
generate_explanation=True,
|
||||||
|
explanation_parameters=batch_predict_explanation_parameters,
|
||||||
|
explanation_metadata=batch_predict_explanation_metadata,
|
||||||
|
machine_type=batch_predict_machine_type,
|
||||||
|
starting_replica_count=batch_predict_starting_replica_count,
|
||||||
|
max_replica_count=batch_predict_max_replica_count,
|
||||||
|
encryption_spec_key_name=encryption_spec_key_name,
|
||||||
|
accelerator_type=batch_predict_accelerator_type,
|
||||||
|
accelerator_count=batch_predict_accelerator_count,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Generate feature attributions from explanations.
|
||||||
|
feature_attribution_task = ModelEvaluationFeatureAttributionOp(
|
||||||
|
project=project,
|
||||||
|
location=location,
|
||||||
|
problem_type=prediction_type,
|
||||||
|
predictions_format=batch_predict_predictions_format,
|
||||||
|
predictions_gcs_source=batch_explain_task.outputs['gcs_output_directory'],
|
||||||
|
predictions_bigquery_source=batch_explain_task.outputs[
|
||||||
|
'bigquery_output_table'
|
||||||
|
],
|
||||||
|
dataflow_machine_type=dataflow_machine_type,
|
||||||
|
dataflow_max_workers_num=dataflow_max_num_workers,
|
||||||
|
dataflow_disk_size_gb=dataflow_disk_size_gb,
|
||||||
|
dataflow_service_account=dataflow_service_account,
|
||||||
|
dataflow_subnetwork=dataflow_subnetwork,
|
||||||
|
dataflow_use_public_ips=dataflow_use_public_ips,
|
||||||
|
encryption_spec_key_name=encryption_spec_key_name,
|
||||||
|
force_runner_mode=force_runner_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
return outputs(
|
||||||
|
feature_attributions=feature_attribution_task.outputs[
|
||||||
|
'feature_attributions'
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
@ -17,7 +17,7 @@ from typing import NamedTuple
|
||||||
|
|
||||||
from google_cloud_pipeline_components import _image
|
from google_cloud_pipeline_components import _image
|
||||||
from google_cloud_pipeline_components import _placeholders
|
from google_cloud_pipeline_components import _placeholders
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation import LLMSafetyBiasMetricsOp
|
from google_cloud_pipeline_components._implementation.model_evaluation.llm_safety_bias.component import llm_safety_bias_metrics as LLMSafetyBiasMetricsOp
|
||||||
from google_cloud_pipeline_components.types.artifact_types import VertexBatchPredictionJob
|
from google_cloud_pipeline_components.types.artifact_types import VertexBatchPredictionJob
|
||||||
from kfp import dsl
|
from kfp import dsl
|
||||||
from kfp.dsl import Artifact
|
from kfp.dsl import Artifact
|
||||||
|
|
|
||||||
|
|
@ -14,11 +14,15 @@
|
||||||
"""Model evaluation preview components."""
|
"""Model evaluation preview components."""
|
||||||
|
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
|
from google_cloud_pipeline_components.preview.model_evaluation.data_bias_component import detect_data_bias as DetectDataBiasOp
|
||||||
|
from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_classification_pipeline import evaluation_llm_classification_pipeline
|
||||||
|
from google_cloud_pipeline_components.preview.model_evaluation.evaluation_llm_text_generation_pipeline import evaluation_llm_text_generation_pipeline
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_graph_component import feature_attribution_graph_component as FeatureAttributionGraphComponentOp
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
|
from google_cloud_pipeline_components.preview.model_evaluation.model_bias_component import detect_model_bias as DetectModelBiasOp
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
|
'evaluation_llm_classification_pipeline',
|
||||||
|
'evaluation_llm_text_generation_pipeline',
|
||||||
'ModelEvaluationFeatureAttributionOp',
|
'ModelEvaluationFeatureAttributionOp',
|
||||||
'FeatureAttributionGraphComponentOp',
|
'FeatureAttributionGraphComponentOp',
|
||||||
'DetectModelBiasOp',
|
'DetectModelBiasOp',
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from typing import List, NamedTuple
|
||||||
|
|
||||||
from google_cloud_pipeline_components import _placeholders
|
from google_cloud_pipeline_components import _placeholders
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp
|
from google_cloud_pipeline_components._implementation.model_evaluation import EvaluationDataSamplerOp
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation import ModelEvaluationFeatureAttributionOp
|
from google_cloud_pipeline_components.preview.model_evaluation.feature_attribution_component import feature_attribution as ModelEvaluationFeatureAttributionOp
|
||||||
from google_cloud_pipeline_components.types.artifact_types import VertexModel
|
from google_cloud_pipeline_components.types.artifact_types import VertexModel
|
||||||
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
||||||
import kfp
|
import kfp
|
||||||
|
|
|
||||||
|
|
@ -16,8 +16,8 @@ from typing import Any, Dict, List, NamedTuple
|
||||||
|
|
||||||
from google_cloud_pipeline_components import _placeholders
|
from google_cloud_pipeline_components import _placeholders
|
||||||
from google_cloud_pipeline_components._implementation.model import GetVertexModelOp
|
from google_cloud_pipeline_components._implementation.model import GetVertexModelOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation import FeatureAttributionGraphComponentOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp
|
|
||||||
from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics
|
from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics
|
||||||
from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics
|
from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics
|
||||||
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
||||||
|
|
|
||||||
|
|
@ -16,9 +16,9 @@ from typing import Any, Dict, List, NamedTuple
|
||||||
|
|
||||||
from google_cloud_pipeline_components import _placeholders
|
from google_cloud_pipeline_components import _placeholders
|
||||||
from google_cloud_pipeline_components._implementation.model import GetVertexModelOp
|
from google_cloud_pipeline_components._implementation.model import GetVertexModelOp
|
||||||
|
from google_cloud_pipeline_components._implementation.model_evaluation import FeatureAttributionGraphComponentOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp
|
from google_cloud_pipeline_components._implementation.model_evaluation import ModelImportEvaluationOp
|
||||||
from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp
|
from google_cloud_pipeline_components._implementation.model_evaluation import TargetFieldDataRemoverOp
|
||||||
from google_cloud_pipeline_components.preview.model_evaluation import FeatureAttributionGraphComponentOp
|
|
||||||
from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics
|
from google_cloud_pipeline_components.types.artifact_types import ClassificationMetrics
|
||||||
from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics
|
from google_cloud_pipeline_components.types.artifact_types import RegressionMetrics
|
||||||
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
from google_cloud_pipeline_components.v1.batch_predict_job import ModelBatchPredictOp
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue