From 0487f9a8b1d8ab0d96d757bd4b598ffd353ecc81 Mon Sep 17 00:00:00 2001
From: Jason Dai <jsndai@google.com>
Date: Fri, 13 Oct 2023 15:15:56 -0700
Subject: [PATCH] feat(components): Add sliced evaluation metrics support for
 custom and unstructured AutoML models in evaluation pipeline

PiperOrigin-RevId: 573331226
---
 components/google-cloud/RELEASE.md                        | 1 +
 .../evaluation_automl_unstructure_data_pipeline.py        | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/components/google-cloud/RELEASE.md b/components/google-cloud/RELEASE.md
index 9933f01511..91788d695a 100644
--- a/components/google-cloud/RELEASE.md
+++ b/components/google-cloud/RELEASE.md
@@ -3,6 +3,7 @@
 * Support `incremental_train_base_model`, `parent_model`, `is_default_version`, `model_version_aliases`, `model_version_description` in `AutoMLImageTrainingJobRunOp`.
 * Add `preview.automl.vision` and `DataConverterJobOp`.
 * Set display names for `preview.llm` pipelines.
+* Add sliced evaluation metrics support for custom and unstructured AutoML models in evaluation pipeline.
 
 ## Release 2.4.1
 * Disable caching for LLM pipeline tasks that store temporary artifacts.
diff --git a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py
index 34fc9ad764..4148df50de 100644
--- a/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py
+++ b/components/google-cloud/google_cloud_pipeline_components/v1/model_evaluation/evaluation_automl_unstructure_data_pipeline.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, NamedTuple
+from typing import Any, List, NamedTuple
 
 from google_cloud_pipeline_components import _placeholders
 from google_cloud_pipeline_components._implementation.model import GetVertexModelOp
@@ -43,6 +43,7 @@ def evaluation_automl_unstructure_data_classification_pipeline(  # pylint: disab
     batch_predict_max_replica_count: int = 10,
     batch_predict_accelerator_type: str = '',
     batch_predict_accelerator_count: int = 0,
+    slicing_specs: List[Any] = [],  # pylint: disable=g-bare-generic
     evaluation_prediction_label_column: str = '',
     evaluation_prediction_score_column: str = '',
     evaluation_class_labels: List[str] = [],  # pylint: disable=g-bare-generic
@@ -81,6 +82,7 @@ def evaluation_automl_unstructure_data_classification_pipeline(  # pylint: disab
     batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set.
     batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec
     batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set.
+    slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice.
     evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`.
     evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`.
     evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"].
@@ -173,6 +175,7 @@ def evaluation_automl_unstructure_data_classification_pipeline(  # pylint: disab
       encryption_spec_key_name=encryption_spec_key_name,
       force_runner_mode=force_runner_mode,
       model=get_model_task.outputs['model'],
+      slicing_specs=slicing_specs,
   )
 
   # Import the evaluation result to Vertex AI.
@@ -373,6 +376,7 @@ def evaluation_automl_unstructure_data_pipeline(  # pylint: disable=dangerous-de
     batch_predict_max_replica_count: int = 10,
     batch_predict_accelerator_type: str = '',
     batch_predict_accelerator_count: int = 0,
+    slicing_specs: List[Any] = [],  # pylint: disable=g-bare-generic
     evaluation_prediction_label_column: str = '',
     evaluation_prediction_score_column: str = '',
     evaluation_class_labels: List[str] = [],  # pylint: disable=g-bare-generic
@@ -409,6 +413,7 @@ def evaluation_automl_unstructure_data_pipeline(  # pylint: disable=dangerous-de
     batch_predict_max_replica_count: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set.
     batch_predict_accelerator_type: The type of accelerator(s) that may be attached to the machine as per `batch_predict_accelerator_count`. Only used if `batch_predict_machine_type` is set. For more details about the machine spec, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec
     batch_predict_accelerator_count: The number of accelerators to attach to the `batch_predict_machine_type`. Only used if `batch_predict_machine_type` is set.
+    slicing_specs: List of `google.cloud.aiplatform_v1.types.ModelEvaluationSlice.SlicingSpec`. When provided, compute metrics for each defined slice. See sample code in https://cloud.google.com/vertex-ai/docs/pipelines/model-evaluation-component For more details on configuring slices, see https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform_v1.types.ModelEvaluationSlice.
     evaluation_prediction_label_column: The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`.
     evaluation_prediction_score_column: The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`.
     evaluation_class_labels: Required for classification prediction type. The list of class names for the target_field_name, in the same order they appear in a file in batch_predict_gcs_source_uris. For instance, if the target_field_name could be either `1` or `0`, then the class_labels input will be ["1", "0"].
@@ -442,6 +447,7 @@ def evaluation_automl_unstructure_data_pipeline(  # pylint: disable=dangerous-de
         batch_predict_max_replica_count=batch_predict_max_replica_count,
         batch_predict_accelerator_type=batch_predict_accelerator_type,
         batch_predict_accelerator_count=batch_predict_accelerator_count,
+        slicing_specs=slicing_specs,
         evaluation_prediction_label_column=evaluation_prediction_label_column,
         evaluation_prediction_score_column=evaluation_prediction_score_column,
         evaluation_class_labels=evaluation_class_labels,