84 lines
3.2 KiB
YAML
84 lines
3.2 KiB
YAML
groups:
|
|
- id: metric_attributes.gen_ai
|
|
type: attribute_group
|
|
brief: 'This group describes GenAI metrics attributes'
|
|
attributes:
|
|
- ref: server.address
|
|
brief: GenAI server address.
|
|
requirement_level: recommended
|
|
- ref: server.port
|
|
brief: GenAI server port.
|
|
requirement_level:
|
|
conditionally_required: If `server.address` is set.
|
|
- ref: gen_ai.response.model
|
|
requirement_level: recommended
|
|
- ref: gen_ai.request.model
|
|
requirement_level: required
|
|
- ref: gen_ai.system
|
|
requirement_level: required
|
|
- ref: gen_ai.operation.name
|
|
requirement_level: required
|
|
- id: metric_attributes.gen_ai.server
|
|
type: attribute_group
|
|
brief: 'This group describes GenAI server metrics attributes'
|
|
extends: metric_attributes.gen_ai
|
|
attributes:
|
|
- ref: error.type
|
|
requirement_level:
|
|
conditionally_required: "if the operation ended in an error"
|
|
note: |
|
|
The `error.type` SHOULD match the error code returned by the Generative AI service,
|
|
the canonical name of exception that occurred, or another low-cardinality error identifier.
|
|
Instrumentations SHOULD document the list of errors they report.
|
|
- id: metric.gen_ai.client.token.usage
|
|
type: metric
|
|
metric_name: gen_ai.client.token.usage
|
|
brief: 'Measures number of input and output tokens used'
|
|
instrument: histogram
|
|
unit: "{token}"
|
|
stability: experimental
|
|
extends: metric_attributes.gen_ai
|
|
attributes:
|
|
- ref: gen_ai.token.type
|
|
requirement_level: required
|
|
- id: metric.gen_ai.client.operation.duration
|
|
type: metric
|
|
metric_name: gen_ai.client.operation.duration
|
|
brief: 'GenAI operation duration'
|
|
instrument: histogram
|
|
unit: "s"
|
|
stability: experimental
|
|
extends: metric_attributes.gen_ai
|
|
attributes:
|
|
- ref: error.type
|
|
requirement_level:
|
|
conditionally_required: "if the operation ended in an error"
|
|
note: |
|
|
The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library,
|
|
the canonical name of exception that occurred, or another low-cardinality error identifier.
|
|
Instrumentations SHOULD document the list of errors they report.
|
|
- id: metric.gen_ai.server.request.duration
|
|
type: metric
|
|
metric_name: gen_ai.server.request.duration
|
|
brief: 'Generative AI server request duration such as time-to-last byte or last output token'
|
|
instrument: histogram
|
|
unit: "s"
|
|
stability: experimental
|
|
extends: metric_attributes.gen_ai.server
|
|
- id: metric.gen_ai.server.time_per_output_token
|
|
type: metric
|
|
metric_name: gen_ai.server.time_per_output_token
|
|
brief: 'Time per output token generated after the first token for successful responses'
|
|
instrument: histogram
|
|
unit: "s"
|
|
stability: experimental
|
|
extends: metric_attributes.gen_ai
|
|
- id: metric.gen_ai.server.time_to_first_token
|
|
type: metric
|
|
metric_name: gen_ai.server.time_to_first_token
|
|
brief: 'Time to generate first token for successful responses'
|
|
instrument: histogram
|
|
unit: "s"
|
|
stability: experimental
|
|
extends: metric_attributes.gen_ai
|