semantic-conventions/model/gen-ai/metrics.yaml

groups:
  - id: metric_attributes.gen_ai
    type: attribute_group
    brief: 'This group describes GenAI metrics attributes'
    attributes:
      - ref: server.address
        brief: GenAI server address.
        requirement_level: recommended
      - ref: server.port
        brief: GenAI server port.
        requirement_level:
          conditionally_required: If `server.address` is set.
      - ref: gen_ai.response.model
        requirement_level: recommended
      - ref: gen_ai.request.model
        requirement_level: required
      - ref: gen_ai.system
        requirement_level: required
      - ref: gen_ai.operation.name
        requirement_level: required
  - id: metric_attributes.gen_ai.server
    type: attribute_group
    brief: 'This group describes GenAI server metrics attributes'
    extends: metric_attributes.gen_ai
    attributes:
      - ref: error.type
        requirement_level:
          conditionally_required: "if the operation ended in an error"
        note: |
          The `error.type` SHOULD match the error code returned by the Generative AI service,
          the canonical name of exception that occurred, or another low-cardinality error identifier.
          Instrumentations SHOULD document the list of errors they report.
  - id: metric_attributes.gen_ai.openai
    type: attribute_group
    brief: 'This group describes GenAI server metrics attributes'
    attributes:
      - ref: gen_ai.openai.response.service_tier
        requirement_level: recommended
  - id: metric.gen_ai.client.token.usage
    type: metric
    metric_name: gen_ai.client.token.usage
    brief: 'Measures number of input and output tokens used'
    instrument: histogram
    unit: "{token}"
    stability: experimental
    extends: metric_attributes.gen_ai
    attributes:
      - ref: gen_ai.token.type
        requirement_level: required
  - id: metric.gen_ai.client.operation.duration
    type: metric
    metric_name: gen_ai.client.operation.duration
    brief: 'GenAI operation duration'
    instrument: histogram
    unit: "s"
    stability: experimental
    extends: metric_attributes.gen_ai
    attributes:
      - ref: error.type
        requirement_level:
          conditionally_required: "if the operation ended in an error"
        note: |
          The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library,
          the canonical name of exception that occurred, or another low-cardinality error identifier.
          Instrumentations SHOULD document the list of errors they report.
  - id: metric.gen_ai.server.request.duration
    type: metric
    metric_name: gen_ai.server.request.duration
    brief: 'Generative AI server request duration such as time-to-last byte or last output token'
    instrument: histogram
    unit: "s"
    stability: experimental
    extends: metric_attributes.gen_ai.server
  - id: metric.gen_ai.server.time_per_output_token
    type: metric
    metric_name: gen_ai.server.time_per_output_token
    brief: 'Time per output token generated after the first token for successful responses'
    instrument: histogram
    unit: "s"
    stability: experimental
    extends: metric_attributes.gen_ai
  - id: metric.gen_ai.server.time_to_first_token
    type: metric
    metric_name: gen_ai.server.time_to_first_token
    brief: 'Time to generate first token for successful responses'
    instrument: histogram
    unit: "s"
    stability: experimental
    extends: metric_attributes.gen_ai