docs(components): Update Dataproc Serverless component docstrings

PiperOrigin-RevId: 539781907
2023-06-12 15:44:17 -07:00 · 2023-06-12 15:44:17 -07:00 · 9eff7ff357
parent 66d608928b
commit 9eff7ff357
4 changed files with 73 additions and 61 deletions
--- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py
+++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_pyspark_batch/component.py
@ -51,26 +51,24 @@ def dataproc_create_pyspark_batch(
  Args:
      project: Project to run the Dataproc batch workload.
      location: Location of the Dataproc batch workload. If
-        not set, default to `us-central1`.
+        not set, defaults to ``"us-central1"``.
      batch_id: The ID to use for the batch, which will become
        the final component of the batch's resource name. If none is
        specified, a default name will be generated by the component.  This
-        value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
+        value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``.
      labels: The labels to associate with this batch. Label
        keys must contain 1 to 63 characters, and must conform to RFC 1035.
        Label values may be empty, but, if present, must contain 1 to 63
        characters, and must conform to RFC 1035. No more than 32 labels can
-        be associated with a batch.  An object containing a list of "key":
-        value pairs.
-          Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.
+        be associated with a batch.  An object containing a list of ``"key":
+        value`` pairs.
+          Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``.
      container_image: Optional custom container image for the
        job runtime environment. If not specified, a default container image
        will be used.
      runtime_config_version: Version of the batch runtime.
-      runtime_config_properties: Runtime configuration for a
-        workload.
-      service_account: Service account that used to execute
-        workload.
+      runtime_config_properties: Runtime configuration for the workload.
+      service_account: Service account that is used to execute the workload.
      network_tags: Tags used for network traffic
        control.
      kms_key: The Cloud KMS key to use for encryption.
@ -81,20 +79,21 @@ def dataproc_create_pyspark_batch(
      spark_history_dataproc_cluster: The Spark History Server
        configuration for the workload.
      main_python_file_uri: The HCFS URI of the main Python
-        file to use as the Spark driver. Must be a .py file.
+        file to use as the Spark driver. Must be a ``.py`` file.
      python_file_uris: HCFS file URIs of Python files to
-        pass to the PySpark framework. Supported file types: .py, .egg, and
-        .zip.
+        pass to the PySpark framework. Supported file types: ``.py``, ``.egg``,
+        and ``.zip``.
      jar_file_uris: HCFS URIs of jar files to add to the
        classpath of the Spark driver and tasks.
      file_uris: HCFS URIs of files to be placed in the
        working directory of each executor.
      archive_uris: HCFS URIs of archives to be extracted
        into the working directory of each executor. Supported file types:
-        .jar, .tar, .tar.gz, .tgz, and .zip.
+        ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``.
      args: The arguments to pass to the driver. Do not
-        include arguments that can be set as batch properties, such as --conf,
-        since a collision can occur that causes an incorrect batch submission.
+        include arguments that can be set as batch properties, such as
+        ``--conf``, since a collision can occur that causes an incorrect batch
+        submission.

  Returns:
      gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see
--- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py
+++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_batch/component.py
@ -51,26 +51,24 @@ def dataproc_create_spark_batch(
  Args:
      project: Project to run the Dataproc batch workload.
      location: Location of the Dataproc batch workload. If
-        not set, default to `us-central1`.
+        not set, defaults to ``"us-central1"``.
      batch_id: The ID to use for the batch, which will become
        the final component of the batch's resource name. If none is
        specified, a default name will be generated by the component.  This
-        value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
+        value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``.
      labels: The labels to associate with this batch. Label
        keys must contain 1 to 63 characters, and must conform to RFC 1035.
        Label values may be empty, but, if present, must contain 1 to 63
        characters, and must conform to RFC 1035. No more than 32 labels can
-        be associated with a batch.  An object containing a list of "key":
-        value pairs.
-          Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.
+        be associated with a batch.  An object containing a list of ``"key":
+        value`` pairs.
+          Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``.
      container_image: Optional custom container image for the
        job runtime environment. If not specified, a default container image
        will be used.
      runtime_config_version: Version of the batch runtime.
-      runtime_config_properties: Runtime configuration for a
-        workload.
-      service_account: Service account that used to execute
-        workload.
+      runtime_config_properties: Runtime configuration for the workload.
+      service_account: Service account that is used to execute the workload.
      network_tags: Tags used for network traffic
        control.
      kms_key: The Cloud KMS key to use for encryption.
@ -85,13 +83,17 @@ def dataproc_create_spark_batch(
      main_class: The name of the driver main class. The jar
        file that contains the class must be in the classpath or specified in
        jar_file_uris.
-      jar_file_uris: HCFS URIs of jar files to add to the
-        classpath of the Spark driver and tasks.
-      file_uris: HCFS URIs of files to be placed in the
-        working directory of each executor.
-      archive_uris: HCFS URIs of archives to be extracted
-        into the working directory of each executor.
-      args: The arguments to pass to the driver.
+      jar_file_uris: HCFS URIs of jar files to add to the classpath of the Spark
+        driver and tasks.
+      file_uris: HCFS URIs of files to be placed in the working directory of
+        each executor.
+      archive_uris: HCFS URIs of archives to be extracted into the working
+        directory of each executor. Supported file types:
+        ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``.
+      args: The arguments to pass to the driver. Do not
+        include arguments that can be set as batch properties, such as
+        ``--conf``, since a collision can occur that causes an incorrect batch
+        submission.

  Returns:
      gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see
--- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py
+++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_r_batch/component.py
@ -48,38 +48,44 @@ def dataproc_create_spark_r_batch(

  Args:
      project: Project to run the Dataproc batch workload.
-      location: Location of the Dataproc batch workload. If not set, default to
-        `us-central1`.
+      location: Location of the Dataproc batch workload. If not set, defaults to
+        ``"us-central1"``.
      batch_id: The ID to use for the batch, which will become
        the final component of the batch's resource name. If none is
        specified, a default name will be generated by the component.  This
-        value must be 4-63 characters. Valid characters are /[a-z][0-9]-/.
+        value must be 4-63 characters. Valid characters are ``/[a-z][0-9]-/``.
      labels: The labels to associate with this batch. Label
        keys must contain 1 to 63 characters, and must conform to RFC 1035.
        Label values may be empty, but, if present, must contain 1 to 63
        characters, and must conform to RFC 1035. No more than 32 labels can
-        be associated with a batch.  An object containing a list of "key":
-        value pairs.
-          Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.
+        be associated with a batch.  An object containing a list of ``"key":
+        value`` pairs.
+          Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``.
      container_image: Optional custom container image for the
        job runtime environment. If not specified, a default container image
        will be used.
      runtime_config_version: Version of the batch runtime.
-      runtime_config_properties: Runtime configuration for a workload.
-      service_account: Service account that used to execute workload.
+      runtime_config_properties: Runtime configuration for the workload.
+      service_account: Service account that is used to execute the workload.
      network_tags: Tags used for network traffic control.
      kms_key: The Cloud KMS key to use for encryption.
      network_uri: Network URI to connect workload to.
      subnetwork_uri: Subnetwork URI to connect workload to.
-      metastore_service: Resource name of an existing Dataproc Metastore service.
-      spark_history_dataproc_cluster: The Spark History Server configuration for the workload.
-      main_r_file_uri: The HCFS URI of the main R file to use as the driver. Must be
-        a .R or .r file.
-      file_uris: HCFS URIs of files to be placed in the working directory of each
-        executor.
-      archive_uris: HCFS URIs of archives to be extracted into the working directory of each
-        executor.
-      args: The arguments to pass to the driver.
+      metastore_service: Resource name of an existing Dataproc Metastore
+        service.
+      spark_history_dataproc_cluster: The Spark History Server configuration for
+        the workload.
+      main_r_file_uri: The HCFS URI of the main R file to use as the driver.
+        Must be a ``.R`` or ``.r`` file.
+      file_uris: HCFS URIs of files to be placed in the working directory of
+        each executor.
+      archive_uris: HCFS URIs of archives to be extracted into the working
+        directory of each executor. Supported file types:
+        ``.jar``, ``.tar``, ``.tar.gz``, ``.tgz``, and ``.zip``.
+      args: The arguments to pass to the driver. Do not
+        include arguments that can be set as batch properties, such as
+        ``--conf``, since a collision can occur that causes an incorrect batch
+        submission.

  Returns:
      gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see
--- a/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py
+++ b/components/google-cloud/google_cloud_pipeline_components/v1/dataproc/create_spark_sql_batch/component.py
@ -47,7 +47,7 @@ def dataproc_create_spark_sql_batch(
  Args:
      project: Project to run the Dataproc batch workload.
      location: Location of the Dataproc batch workload. If
-        not set, default to `us-central1`.
+        not set, defaults to ``"us-central1"``.
      batch_id: The ID to use for the batch, which will become
        the final component of the batch's resource name. If none is
        specified, a default name will be generated by the component.  This
@ -56,26 +56,31 @@ def dataproc_create_spark_sql_batch(
        keys must contain 1 to 63 characters, and must conform to RFC 1035.
        Label values may be empty, but, if present, must contain 1 to 63
        characters, and must conform to RFC 1035. No more than 32 labels can
-        be associated with a batch.  An object containing a list of "key":
-        value pairs.
-          Example: { "name": "wrench", "mass": "1.3kg", "count": "3" }.
+        be associated with a batch.  An object containing a list of ``"key":
+        value`` pairs.
+          Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``.
      container_image: Optional custom container image for the
        job runtime environment. If not specified, a default container image
        will be used.
      runtime_config_version: Version of the batch runtime.
-      runtime_config_properties: Runtime configuration for a workload.
-      service_account: Service account that used to execute workload.
+      runtime_config_properties: Runtime configuration for the workload.
+      service_account: Service account that is used to execute the workload.
      network_tags: Tags used for network traffic control.
      kms_key: The Cloud KMS key to use for encryption.
      network_uri: Network URI to connect workload to.
      subnetwork_uri: Subnetwork URI to connect workload to.
-      metastore_service: Resource name of an existing Dataproc Metastore service.
-      spark_history_dataproc_cluster: The Spark History Server configuration for the workload.
-      query_file_uri: The HCFS URI of the script that contains
-        Spark SQL queries to execute.
-      query_variables: Mapping of query variable names to values (equivalent to the Spark SQL
-        command: SET name="value";).
-      jar_file_uris: HCFS URIs of jar files to be added to the Spark CLASSPATH.
+      metastore_service: Resource name of an existing Dataproc Metastore
+        service.
+      spark_history_dataproc_cluster: The Spark History Server configuration for
+        the workload.
+      query_file_uri: The HCFS URI of the script that contains Spark SQL queries
+        to execute.
+      query_variables: Mapping of query variable names to values (equivalent to
+        the Spark SQL command: ``SET name="value";``). An object containing a
+        list of ``"key": value`` pairs.
+          Example: ``{ "name": "wrench", "mass": "1.3kg", "count": "3" }``.
+      jar_file_uris: HCFS URIs of jar files to be added to the Spark
+        ``CLASSPATH``.

  Returns:
      gcp_resources: Serialized gcp_resources proto tracking the Dataproc batch workload. For more details, see