From 58b08c9dfbe4516b8ba8bdd73c403dd3c3931d69 Mon Sep 17 00:00:00 2001 From: Steve Gordon Date: Mon, 11 Sep 2023 10:30:14 +0100 Subject: [PATCH] Add `cluster.name` and `node.name` attributes to Elasticsearch semantic conventions. (#285) Co-authored-by: Alexander Wert Co-authored-by: Joao Grassi --- CHANGELOG.md | 2 ++ docs/database/elasticsearch.md | 42 +++++++++++++++++++++------------- model/trace/database.yaml | 19 +++++++++++++++ 3 files changed, 47 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 66573a09c..3fbe4268e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ release. ## Unreleased +- Add `cluster.name` and `node.name` attributes to Elasticsearch semantic conventions. + ([#285](https://github.com/open-telemetry/semantic-conventions/pull/285)) - Fix the unit of metric.process.runtime.jvm.system.cpu.load_1m to be {run_queue_item} ([#95](https://github.com/open-telemetry/semantic-conventions/pull/95)) - Update `.count` metric naming convention so that it only applies to UpDownCounters, diff --git a/docs/database/elasticsearch.md b/docs/database/elasticsearch.md index ae329f7e5..43003ddc8 100644 --- a/docs/database/elasticsearch.md +++ b/docs/database/elasticsearch.md @@ -6,7 +6,11 @@ linkTitle: Elasticsearch **Status**: [Experimental][DocumentStatus] -This document defines semantic conventions to apply when creating a span for requests to Elasticsearch. +The Semantic Conventions for [Elasticsearch](https://www.elastic.co/) extend and override the [Database Semantic Conventions](database-spans.md) +that describe common database operations attributes in addition to the Semantic Conventions +described on this page. + +`db.system` MUST be set to `"elasticsearch"`. ## Span Name @@ -30,25 +34,29 @@ in order to map the path part values to their names. **[1]:** when the url has dynamic values -## Span attributes - -`db.system` MUST be set to `"elasticsearch"`. +## Call-level attributes | Attribute | Type | Description | Examples | Requirement Level | |---|---|---|---|---| -| [`db.operation`](database-spans.md) | string | The endpoint identifier for the request. [1] | `search`; `ml.close_job`; `cat.aliases` | Required | -| [`db.statement`](database-spans.md) | string | The request body for a [search-type query](https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html), as a json string. | `"{\"query\":{\"term\":{\"user.id\":\"kimchy\"}}}"` | Recommended: [2] | -| `http.request.method` | string | HTTP request method. [3] | `GET`; `POST`; `HEAD` | Required | -| [`server.address`](../general/attributes.md) | string | Server address - domain name if available without reverse DNS lookup, otherwise IP address or Unix domain socket name. [4] | `example.com` | See below | -| [`server.port`](../general/attributes.md) | int | Server port number [5] | `80`; `8080`; `443` | Recommended | -| [`url.full`](../url/url.md) | string | Absolute URL describing a network resource according to [RFC3986](https://www.rfc-editor.org/rfc/rfc3986) [6] | `https://localhost:9200/index/_search?q=user.id:kimchy` | Required | +| `db.elasticsearch.cluster.name` | string | Represents the identifier of an Elasticsearch cluster. | `e9106fc68e3044f0b1475b04bf4ffd5f` | Recommended: [1] | +| `db.elasticsearch.node.name` | string | Represents the human-readable identifier of the node/instance to which a request was routed. | `instance-0000000001` | Recommended: [2] | +| [`db.operation`](database-spans.md) | string | The endpoint identifier for the request. [3] | `search`; `ml.close_job`; `cat.aliases` | Required | +| [`db.statement`](database-spans.md) | string | The request body for a [search-type query](https://www.elastic.co/guide/en/elasticsearch/reference/current/search.html), as a json string. | `"{\"query\":{\"term\":{\"user.id\":\"kimchy\"}}}"` | Recommended: [4] | +| `http.request.method` | string | HTTP request method. [5] | `GET`; `POST`; `HEAD` | Required | +| [`server.address`](../general/attributes.md) | string | Server address - domain name if available without reverse DNS lookup, otherwise IP address or Unix domain socket name. [6] | `example.com` | See below | +| [`server.port`](../general/attributes.md) | int | Server port number [7] | `80`; `8080`; `443` | Recommended | +| [`url.full`](../url/url.md) | string | Absolute URL describing a network resource according to [RFC3986](https://www.rfc-editor.org/rfc/rfc3986) [8] | `https://localhost:9200/index/_search?q=user.id:kimchy` | Required | -**[1]:** When setting this to an SQL keyword, it is not recommended to attempt any client-side parsing of `db.statement` just to get this property, but it should be set if the operation name is provided by the library being instrumented. If the SQL statement has an ambiguous operation, or performs more than one operation, this value may be omitted. +**[1]:** When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Cluster" HTTP response header. -**[2]:** Should be collected by default for search-type queries and only if there is sanitization that excludes sensitive information. +**[2]:** When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Instance" HTTP response header. -**[3]:** HTTP request method value SHOULD be "known" to the instrumentation. +**[3]:** When setting this to an SQL keyword, it is not recommended to attempt any client-side parsing of `db.statement` just to get this property, but it should be set if the operation name is provided by the library being instrumented. If the SQL statement has an ambiguous operation, or performs more than one operation, this value may be omitted. + +**[4]:** Should be collected by default for search-type queries and only if there is sanitization that excludes sensitive information. + +**[5]:** HTTP request method value SHOULD be "known" to the instrumentation. By default, this convention defines "known" methods as the ones listed in [RFC9110](https://www.rfc-editor.org/rfc/rfc9110.html#name-methods) and the PATCH method defined in [RFC5789](https://www.rfc-editor.org/rfc/rfc5789.html). @@ -63,12 +71,12 @@ HTTP method names are case-sensitive and `http.request.method` attribute value M Instrumentations for specific web frameworks that consider HTTP methods to be case insensitive, SHOULD populate a canonical equivalent. Tracing instrumentations that do so, MUST also set `http.request.method_original` to the original value. -**[4]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent +**[6]:** When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent the server address behind any intermediaries (e.g. proxies) if it's available. -**[5]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries (e.g. proxies) if it's available. +**[7]:** When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent the server port behind any intermediaries (e.g. proxies) if it's available. -**[6]:** For network calls, URL usually has `scheme://host[:port][path][?query][#fragment]` format, where the fragment is not transmitted over HTTP, but if it is known, it should be included nevertheless. +**[8]:** For network calls, URL usually has `scheme://host[:port][path][?query][#fragment]` format, where the fragment is not transmitted over HTTP, but if it is known, it should be included nevertheless. `url.full` MUST NOT contain credentials passed via URL in form of `https://username:password@www.example.com/`. In such case username and password should be redacted and attribute's value should be `https://REDACTED:REDACTED@www.example.com/`. `url.full` SHOULD capture the absolute URL when it is available (or can be reconstructed) and SHOULD NOT be validated or modified except for sanitizing purposes. @@ -86,5 +94,7 @@ the server address behind any intermediaries (e.g. proxies) if it's available. | `db.operation` | `"search"` | | `url.full` | `"https://elasticsearch.mydomain.com:9200/my-index-000001/_search?from=40&size=20"` | | `db.elasticsearch.path_parts.index` | `"my-index-000001"` | +| `db.elasticsearch.cluster.name` | `"e9106fc68e3044f0b1475b04bf4ffd5f"` | +| `db.elasticsearch.node.name` | `"instance-0000000001"` | [DocumentStatus]: https://github.com/open-telemetry/opentelemetry-specification/tree/v1.22.0/specification/document-status.md diff --git a/model/trace/database.yaml b/model/trace/database.yaml index 73f6005d0..2348a2a0f 100644 --- a/model/trace/database.yaml +++ b/model/trace/database.yaml @@ -413,6 +413,7 @@ groups: note: > For **Redis**, the value provided for `db.statement` SHOULD correspond to the syntax of the Redis CLI. If, for example, the [`HMSET` command](https://redis.io/commands/hmset) is invoked, `"HMSET myhash field1 'Hello' field2 'World'"` would be a suitable value for `db.statement`. + - id: db.mongodb prefix: db.mongodb type: span @@ -453,6 +454,24 @@ groups: examples: [ '"{\"query\":{\"term\":{\"user.id\":\"kimchy\"}}}"' ] - ref: server.address - ref: server.port + - id: cluster.name + type: string + requirement_level: + recommended: > + When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Cluster" HTTP response header. + tag: call-level-tech-specific + brief: > + Represents the identifier of an Elasticsearch cluster. + examples: ["e9106fc68e3044f0b1475b04bf4ffd5f"] + - id: node.name + type: string + requirement_level: + recommended: > + When communicating with an Elastic Cloud deployment, this should be collected from the "X-Found-Handling-Instance" HTTP response header. + tag: call-level-tech-specific + brief: > + Represents the human-readable identifier of the node/instance to which a request was routed. + examples: ["instance-0000000001"] - id: db.sql prefix: 'db.sql'