semantic-conventions/model/trace/database.yaml

groups:
  - id: trace.db.common.minimal
    extends: attributes.db.client.minimal
    type: attribute_group
    brief: This group defines the attributes used to perform database client calls.
    attributes:
      # TODO: add db.system once https://github.com/open-telemetry/build-tools/issues/192 is possible
      # - ref: db.system
      #   sampling_relevant: true
      - ref: db.operation.name
        sampling_relevant: true
      - ref: server.address
        sampling_relevant: true
      - ref: server.port
        sampling_relevant: true

  - id: trace.db.common.query
    extends: trace.db.common.minimal
    type: attribute_group
    brief: This group defines the attributes used to perform database client calls.
    attributes:
      - ref: db.query.text
        sampling_relevant: true
        requirement_level:
          recommended: >
            Non-parameterized query text SHOULD NOT be collected by default unless there is sanitization that excludes
            sensitive data, e.g. by redacting all literal values present in the query text.

            Parameterized query text SHOULD be collected by default
            (the query parameter values themselves are opt-in,
            see [`db.query.parameter.<key>`](../../docs/attributes-registry/db.md)).
      - ref: db.query.parameter
        requirement_level: opt_in

  - id: trace.db.common.query_and_collection
    extends: trace.db.common.minimal
    type: attribute_group
    brief: This group defines the attributes used to perform database client calls.
    attributes:
      - ref: db.query.text
        sampling_relevant: true
        requirement_level:
          recommended: >
            SHOULD be collected by default only if there is sanitization that excludes sensitive information.
            See [Sanitization of `db.query.text`](../../docs/database/database-spans.md#sanitization-of-dbquerytext).
      - ref: db.query.parameter
        requirement_level: opt_in
      - ref: db.collection.name
        sampling_relevant: true
        requirement_level:
          conditionally_required: >
            If readily available. The collection name MAY be parsed from the query text,
            in which case it SHOULD be the first collection name found in the query.

  - id: trace.db.common.full
    type: attribute_group
    brief: This group documents attributes that describe database call along with network information.
    extends: trace.db.common.query_and_collection
    attributes:
      - ref: network.peer.address
        brief: Peer address of the database node where the operation was performed.
        requirement_level:
          recommended: If applicable for this database system.
        note: >
          Semantic conventions for individual database systems SHOULD document whether `network.peer.*` attributes are applicable.
          Network peer address and port are useful when the application interacts with individual database nodes directly.

          If a database operation involved multiple network calls (for example retries), the address of the last contacted node SHOULD be used.
      - ref: network.peer.port
        requirement_level:
          recommended: if and only if `network.peer.address` is set.
      - ref: db.system
        sampling_relevant: true
        # TODO: Not adding to the minimal because of https://github.com/open-telemetry/build-tools/issues/192
        requirement_level: required
      - ref: db.namespace
        sampling_relevant: true
        requirement_level:
          conditionally_required: If available.


  - id: db
    type: span
    brief: This span defines the attributes used to perform database client calls.
    span_kind: client
    extends: trace.db.common.full

  - id: db.mssql
    type: span
    extends: db.sql
    brief: >
      Attributes for Microsoft SQL Server
    attributes:
      - ref: db.namespace
        sampling_relevant: true
        brief: The name of the database, fully qualified within the server address and port.
        note:
          When connecting to a default instance, `db.namespace` SHOULD be set to the name of
          the database. When connecting to a [named instance](https://learn.microsoft.com/sql/connect/jdbc/building-the-connection-url#named-and-multiple-sql-server-instances),
          `db.namespace` SHOULD be set to the combination of instance and database name following the `{instance_name}.{database_name}` pattern.

          For commands that switch the database, this SHOULD be set to the target database (even if the command fails).
        examples: ["instance1.products", "customers"]

  - id: db.cassandra
    type: span
    extends: trace.db.common.query_and_collection
    brief: >
      Attributes for Cassandra
    attributes:
      - ref: db.namespace
        sampling_relevant: true
        brief: The Cassandra keyspace name.
        note: For commands that switch the keyspace, this SHOULD be set to the target keyspace (even if the command fails).
        examples: ["mykeyspace"]
        requirement_level:
          conditionally_required: If available.
      - ref: db.cassandra.page_size
      - ref: db.cassandra.consistency_level
      - ref: db.collection.name
        brief: The name of the Cassandra table that the operation is acting upon.
      - ref: db.cassandra.idempotence
      - ref: db.cassandra.speculative_execution_count
      - ref: db.cassandra.coordinator.id
      - ref: db.cassandra.coordinator.dc
      - ref: network.peer.address
        brief: Peer address of the database node where the operation was performed.
        requirement_level:
          recommended
        note: >
          If a database operation involved multiple network calls (for example retries), the address of the last contacted node SHOULD be used.
      - ref: network.peer.port
        requirement_level:
          recommended: if and only if `network.peer.address` is set.
  - id: db.hbase
    type: span
    extends: trace.db.common.minimal
    brief: >
      Attributes for HBase
    attributes:
      - ref: db.namespace
        sampling_relevant: true
        brief: The HBase namespace.
        requirement_level:
          conditionally_required: If applicable.
        note: >
          When performing table-related operations, the instrumentations SHOULD extract the namespace from the table name according to
          the [HBase table naming conventions](https://hbase.apache.org/book.html#namespace_creation). If namespace is not provided,
          instrumentation SHOULD set `db.namespace` value to `default`.
        examples: ['mynamespace']
      - ref: db.collection.name
        sampling_relevant: true
        brief: The HBase table name.
        requirement_level:
          conditionally_required: If applicable.
        note: >
          If table name includes the namespace, the `db.collection.name` SHOULD be set to the full table name.
        examples: ['mytable', 'ns:table']

  - id: db.couchdb
    type: span
    extends: trace.db.common.minimal
    brief: >
      Attributes for CouchDB
    attributes:
      - ref: db.operation.name
        sampling_relevant: true
        brief: >
          The HTTP method + the target REST route.
        examples: ['GET /{db}/{docid}']
        note: >
          In **CouchDB**, `db.operation.name` should be set to the HTTP method +
          the target REST route according to the API reference documentation.
          For example, when retrieving a document, `db.operation.name` would be set to
          (literally, i.e., without replacing the placeholders with concrete values):
          [`GET /{db}/{docid}`](https://docs.couchdb.org/en/stable/api/document/common.html#get--db-docid).
      - ref: db.namespace
        sampling_relevant: true
        requirement_level:
          conditionally_required: If available.
        note: ""  # overriding the base note

  - id: db.redis
    type: span
    extends: trace.db.common.query
    brief: >
      Attributes for Redis
    attributes:
      - ref: db.namespace
        sampling_relevant: true
        brief: >
          The index of the database being accessed as used in the [`SELECT` command](https://redis.io/commands/select)
          (captured as a string).
        requirement_level:
          conditionally_required: If and only if it can be captured reliably.
        note: >
          The database index for current connection can be changed by the application dynamically. Instrumentations MAY use
          the initial database index provided in the connection string and keep track of the currently selected
          database to capture the `db.namespace`.

          Instrumentations SHOULD NOT set this attribute if capturing it would require additional network calls to Redis.

          For commands that switch the database, this SHOULD be set to the target database (even if the command fails).
        examples: ["0", "1", "15"]
      - ref: db.query.text
        sampling_relevant: true
        brief: >
          The full syntax of the Redis CLI command.
        examples: ["HMSET myhash field1 'Hello' field2 'World'"]
        note: >
          For **Redis**, the value provided for `db.query.text` SHOULD correspond to the syntax of the Redis CLI.
          If, for example, the [`HMSET` command](https://redis.io/commands/hmset) is invoked, `"HMSET myhash field1 'Hello' field2 'World'"` would be a suitable value for `db.query.text`.
      - ref: network.peer.address
        brief: Peer address of the database node where the operation was performed.
        requirement_level:
          recommended
        note: >
          If a database operation involved multiple network calls (for example retries), the address of the last contacted node SHOULD be used.
      - ref: network.peer.port
        requirement_level:
          recommended: if and only if `network.peer.address` is set.

  - id: db.mongodb
    type: span
    extends: trace.db.common.minimal
    brief: >
      Attributes for MongoDB
    attributes:
      - ref: db.operation.name
        sampling_relevant: true
        brief: >
          The name of the command being executed.
        note: >
          See [MongoDB database commands](https://www.mongodb.com/docs/manual/reference/command/).
        examples: ['findAndModify', 'getMore', 'update']
      - ref: db.collection.name
        sampling_relevant: true
        brief:
          The MongoDB collection being accessed within the database stated in `db.namespace`.
        requirement_level: required
      - ref: db.namespace
        sampling_relevant: true
        brief: The MongoDB database name.
        requirement_level:
          conditionally_required: If available.
        note: ""  # overriding the base note

  - id: db.elasticsearch
    type: span
    extends: trace.db.common.minimal
    brief: >
      Attributes for Elasticsearch
    attributes:
      - ref: http.request.method
        sampling_relevant: true
        requirement_level: required
      - ref: db.operation.name
        requirement_level: required
        note: >
          The `db.operation.name` SHOULD match the endpoint identifier provided in the request
          (see the [Elasticsearch schema](https://raw.githubusercontent.com/elastic/elasticsearch-specification/main/output/schema/schema.json)).
        examples: [ 'search', 'ml.close_job', 'cat.aliases' ]
      - ref: url.full
        sampling_relevant: true
        requirement_level: required
        examples: [ 'https://localhost:9200/index/_search?q=user.id:kimchy' ]
      - ref: db.query.text
        sampling_relevant: true
        requirement_level:
          recommended: >
            Should be collected by default for search-type queries and only if there is sanitization that excludes
            sensitive information.
        brief: The request body for a [search-type query](https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html), as a json string.
        examples: [ '"{\"query\":{\"term\":{\"user.id\":\"kimchy\"}}}"' ]
      - ref: db.collection.name
        sampling_relevant: true
        requirement_level: recommended
        brief: The index or data stream against which the query is executed.
        note: >
          The query may target multiple indices or data streams, in which case it SHOULD be a comma separated list of those.
          If the query doesn't target a specific index, this field MUST NOT be set.
        examples: [ 'my_index', 'index1, index2' ]
      - ref: db.namespace
        sampling_relevant: true
        note: >
          When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Cluster" HTTP response header.
        brief: The name of the Elasticsearch cluster which the client connects to.
        requirement_level: recommended
      - ref: db.elasticsearch.node.name
        note: >
          When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Instance" HTTP response header.
        requirement_level: recommended
      - ref: db.elasticsearch.path_parts
        requirement_level:
          conditionally_required: when the url has dynamic values

  - id: db.sql
    type: span
    extends: trace.db.common.query_and_collection
    brief: >
      Attributes for SQL databases
    attributes:
      - ref: db.operation.name
        note: >
          This SHOULD be the SQL command such as `SELECT`, `INSERT`, `UPDATE`, `CREATE`, `DROP`.

          In the case of `EXEC`, this SHOULD be the stored procedure name that is being executed.
        examples: ['SELECT', 'INSERT', 'UPDATE', 'DELETE', 'CREATE', 'mystoredproc']
      - ref: db.collection.name
        brief: The name of the SQL table that the operation is acting upon.
        examples: ['users', 'dbo.products']
      - ref: db.namespace
        requirement_level:
          conditionally_required: If available.
        note: |
          If a database system has multiple namespace components, they SHOULD be concatenated
          (potentially using database system specific conventions) from most general to most
          specific namespace component, and more specific namespaces SHOULD NOT be captured without
          the more general namespaces, to ensure that "startswith" queries for the more general namespaces will be valid.

          Unless specified by the system-specific semantic convention, the `db.namespace` attribute matches
          the name of the database being accessed.

          The database name can usually be obtained with database driver API such as
          [JDBC `Connection.getCatalog()`](https://docs.oracle.com/javase/8/docs/api/java/sql/Connection.html#getCatalog--)
          or [.NET `SqlConnection.Database`](https://learn.microsoft.com/dotnet/api/system.data.sqlclient.sqlconnection.database).

          Some database drivers don't detect when the current database is changed (for example, with SQL `USE database` statement).
          Instrumentations that parse SQL statements MAY use the database name provided
          in the connection string and keep track of the currently selected database name.

          For commands that switch the database, this SHOULD be set to the target database (even if the command fails).

          If instrumentation cannot reliably determine the current database name, it SHOULD NOT set `db.namespace`.
  - id: db.cosmosdb
    type: span
    extends: trace.db.common.query_and_collection
    brief: >
      Attributes for Cosmos DB.
    attributes:
      - ref: db.cosmosdb.client_id
      - ref: db.cosmosdb.operation_type
        requirement_level:
          conditionally_required: when performing one of the operations in this list
      - ref: user_agent.original
        brief: 'Full user-agent string is generated by Cosmos DB SDK'
        note: >
          The user-agent value is generated by SDK which is a combination of<br>
          `sdk_version` : Current version of SDK. e.g. 'cosmos-netstandard-sdk/3.23.0'<br>
          `direct_pkg_version` : Direct package version used by Cosmos DB SDK. e.g. '3.23.1'<br>
          `number_of_client_instances` : Number of cosmos client instances created by the application. e.g. '1'<br>
          `type_of_machine_architecture` : Machine architecture. e.g. 'X64'<br>
          `operating_system` : Operating System. e.g. 'Linux 5.4.0-1098-azure 104 18'<br>
          `runtime_framework` : Runtime Framework. e.g. '.NET Core 3.1.32'<br>
          `failover_information` : Generated key to determine if region failover enabled.
             Format Reg-{D (Disabled discovery)}-S(application region)|L(List of preferred regions)|N(None, user did not configure it).
             Default value is "NS".
        examples: ['cosmos-netstandard-sdk/3.23.0\|3.23.1\|1\|X64\|Linux 5.4.0-1098-azure 104 18\|.NET Core 3.1.32\|S\|']
      - ref: db.cosmosdb.connection_mode
        requirement_level:
          conditionally_required: if not `direct` (or pick gw as default)
      - ref: db.collection.name
        brief: >
          Cosmos DB container name.
        requirement_level:
          conditionally_required: if available
      - ref: db.cosmosdb.request_content_length
      - ref: db.cosmosdb.status_code
        requirement_level:
          conditionally_required: if response was received
      - ref: db.cosmosdb.sub_status_code
        requirement_level:
          conditionally_required: when response was received and contained sub-code.
      - ref: db.cosmosdb.request_charge
        requirement_level:
          conditionally_required: when available
      - ref: db.namespace
        sampling_relevant: true
        requirement_level:
          conditionally_required: If available.
        note: ""  # overriding the base note