opentelemetry-python-contrib/instrumentation-genai/opentelemetry-instrumentati.../tests/test_chat_metrics.py

191 lines
5.8 KiB
Python

import pytest
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
def assert_all_metric_attributes(data_point):
assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
== GenAIAttributes.GenAiOperationNameValues.CHAT.value
)
assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
== GenAIAttributes.GenAiSystemValues.OPENAI.value
)
assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]
== "gpt-4o-mini"
)
assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL]
== "gpt-4o-mini-2024-07-18"
)
assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes
assert (
data_point.attributes["gen_ai.openai.response.system_fingerprint"]
== "fp_0ba0d124f1"
)
assert (
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
in data_point.attributes
)
assert (
data_point.attributes[
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
]
== "default"
)
assert (
data_point.attributes[ServerAttributes.SERVER_ADDRESS]
== "api.openai.com"
)
@pytest.mark.vcr()
def test_chat_completion_metrics(
metric_reader, openai_client, instrument_with_content
):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
openai_client.chat.completions.create(
messages=messages_value, model=llm_model_value, stream=False
)
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
assert len(metric_data) == 2
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert_all_metric_attributes(duration_metric.data.data_points[0])
token_usage_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
),
None,
)
assert token_usage_metric is not None
input_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
),
None,
)
assert input_token_usage is not None
assert input_token_usage.sum == 12
# assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
assert input_token_usage.bucket_counts[2] == 1
assert_all_metric_attributes(input_token_usage)
output_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
),
None,
)
assert output_token_usage is not None
assert output_token_usage.sum == 5
# assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
assert output_token_usage.bucket_counts[2] == 1
assert_all_metric_attributes(output_token_usage)
@pytest.mark.vcr()
@pytest.mark.asyncio()
async def test_async_chat_completion_metrics(
metric_reader, async_openai_client, instrument_with_content
):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
await async_openai_client.chat.completions.create(
messages=messages_value, model=llm_model_value, stream=False
)
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
assert len(metric_data) == 2
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert_all_metric_attributes(duration_metric.data.data_points[0])
token_usage_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
),
None,
)
assert token_usage_metric is not None
input_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
),
None,
)
assert input_token_usage is not None
assert input_token_usage.sum == 12
assert_all_metric_attributes(input_token_usage)
output_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
),
None,
)
assert output_token_usage is not None
assert output_token_usage.sum == 12
assert_all_metric_attributes(output_token_usage)