groups: - id: metric_attributes.gen_ai type: attribute_group brief: 'This group describes GenAI metrics attributes' attributes: - ref: server.address brief: GenAI server address. requirement_level: recommended - ref: server.port brief: GenAI server port. requirement_level: conditionally_required: If `server.address` is set. - ref: gen_ai.response.model requirement_level: recommended - ref: gen_ai.request.model requirement_level: conditionally_required: If available. - ref: gen_ai.system requirement_level: required - ref: gen_ai.operation.name requirement_level: required - id: metric_attributes.gen_ai.server type: attribute_group brief: 'This group describes GenAI server metrics attributes' extends: metric_attributes.gen_ai attributes: - ref: error.type requirement_level: conditionally_required: "if the operation ended in an error" note: | The `error.type` SHOULD match the error code returned by the Generative AI service, the canonical name of exception that occurred, or another low-cardinality error identifier. Instrumentations SHOULD document the list of errors they report. - id: metric_attributes.gen_ai.openai type: attribute_group brief: 'This group describes GenAI server metrics attributes' attributes: - ref: gen_ai.openai.response.service_tier requirement_level: recommended - ref: gen_ai.openai.response.system_fingerprint requirement_level: recommended - id: metric.gen_ai.client.token.usage type: metric metric_name: gen_ai.client.token.usage brief: 'Measures number of input and output tokens used' instrument: histogram unit: "{token}" stability: development extends: metric_attributes.gen_ai attributes: - ref: gen_ai.token.type requirement_level: required - id: metric.gen_ai.client.operation.duration type: metric metric_name: gen_ai.client.operation.duration brief: 'GenAI operation duration' instrument: histogram unit: "s" stability: development extends: metric_attributes.gen_ai attributes: - ref: error.type requirement_level: conditionally_required: "if the operation ended in an error" note: | The `error.type` SHOULD match the error code returned by the Generative AI provider or the client library, the canonical name of exception that occurred, or another low-cardinality error identifier. Instrumentations SHOULD document the list of errors they report. - id: metric.gen_ai.server.request.duration type: metric metric_name: gen_ai.server.request.duration brief: 'Generative AI server request duration such as time-to-last byte or last output token' instrument: histogram unit: "s" stability: development extends: metric_attributes.gen_ai.server - id: metric.gen_ai.server.time_per_output_token type: metric metric_name: gen_ai.server.time_per_output_token brief: 'Time per output token generated after the first token for successful responses' instrument: histogram unit: "s" stability: development extends: metric_attributes.gen_ai - id: metric.gen_ai.server.time_to_first_token type: metric metric_name: gen_ai.server.time_to_first_token brief: 'Time to generate first token for successful responses' instrument: histogram unit: "s" stability: development extends: metric_attributes.gen_ai