Add metrics to the Python OpenAI instrumentation (#3180)

This commit is contained in:
Drew Robbins 2025-01-16 07:24:35 +09:00 committed by GitHub
parent 07c97eac38
commit a716949d1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 763 additions and 9 deletions

View File

@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add example to `opentelemetry-instrumentation-openai-v2`
([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
- Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
- Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))
## Version 2.0b0 (2024-11-08)

View File

@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
:target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/
This library allows tracing LLM requests and logging of messages made by the
`OpenAI Python API library <https://pypi.org/project/openai/>`_.
`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
the duration of the operations and the number of tokens used as metrics.
Installation
@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
# Uninstrument all clients
OpenAIInstrumentor().uninstrument()
Bucket Boundaries
-----------------
This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.
The bucket boundaries are defined as follows:
- For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
- For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:
.. code-block:: python
from opentelemetry.sdk.metrics import MeterProvider, View
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
views = [
View(
instrument_name="gen_ai.client.token.usage",
aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
),
View(
instrument_name="gen_ai.client.operation.duration",
aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
),
]
metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
metric_reader = PeriodicExportingMetricReader(metric_exporter)
provider = MeterProvider(
metric_readers=[metric_reader],
views=views
)
from opentelemetry.sdk.metrics import set_meter_provider
set_meter_provider(provider)
For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.
References
----------
* `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_

View File

@ -49,13 +49,18 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
from opentelemetry.instrumentation.openai_v2.package import _instruments
from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
from opentelemetry.instrumentation.utils import unwrap
from opentelemetry.metrics import get_meter
from opentelemetry.semconv.schemas import Schemas
from opentelemetry.trace import get_tracer
from .instruments import Instruments
from .patch import async_chat_completions_create, chat_completions_create
class OpenAIInstrumentor(BaseInstrumentor):
def __init__(self):
self._meter = None
def instrumentation_dependencies(self) -> Collection[str]:
return _instruments
@ -75,12 +80,21 @@ class OpenAIInstrumentor(BaseInstrumentor):
schema_url=Schemas.V1_28_0.value,
event_logger_provider=event_logger_provider,
)
meter_provider = kwargs.get("meter_provider")
self._meter = get_meter(
__name__,
"",
meter_provider,
schema_url=Schemas.V1_28_0.value,
)
instruments = Instruments(self._meter)
wrap_function_wrapper(
module="openai.resources.chat.completions",
name="Completions.create",
wrapper=chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)
@ -88,7 +102,7 @@ class OpenAIInstrumentor(BaseInstrumentor):
module="openai.resources.chat.completions",
name="AsyncCompletions.create",
wrapper=async_chat_completions_create(
tracer, event_logger, is_content_enabled()
tracer, event_logger, instruments, is_content_enabled()
),
)

View File

@ -0,0 +1,11 @@
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
class Instruments:
def __init__(self, meter):
self.operation_duration_histogram = (
gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
)
self.token_usage_histogram = (
gen_ai_metrics.create_gen_ai_client_token_usage(meter)
)

View File

@ -13,6 +13,7 @@
# limitations under the License.
from timeit import default_timer
from typing import Optional
from openai import Stream
@ -21,8 +22,12 @@ from opentelemetry._events import Event, EventLogger
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.trace import Span, SpanKind, Tracer
from .instruments import Instruments
from .utils import (
choice_to_event,
get_llm_request_attributes,
@ -34,7 +39,10 @@ from .utils import (
def chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `ChatCompletion` class to trace it."""
@ -54,6 +62,9 @@ def chat_completions_create(
message_to_event(message, capture_content)
)
start = default_timer()
result = None
error_type = None
try:
result = wrapped(*args, **kwargs)
if is_streaming(kwargs):
@ -69,14 +80,27 @@ def chat_completions_create(
return result
except Exception as error:
error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)
return traced_method
def async_chat_completions_create(
tracer: Tracer, event_logger: EventLogger, capture_content: bool
tracer: Tracer,
event_logger: EventLogger,
instruments: Instruments,
capture_content: bool,
):
"""Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""
@ -96,6 +120,9 @@ def async_chat_completions_create(
message_to_event(message, capture_content)
)
start = default_timer()
result = None
error_type = None
try:
result = await wrapped(*args, **kwargs)
if is_streaming(kwargs):
@ -111,12 +138,88 @@ def async_chat_completions_create(
return result
except Exception as error:
error_type = type(error).__qualname__
handle_span_exception(span, error)
raise
finally:
duration = max((default_timer() - start), 0)
_record_metrics(
instruments,
duration,
result,
span_attributes,
error_type,
)
return traced_method
def _record_metrics(
instruments: Instruments,
duration: float,
result,
span_attributes: dict,
error_type: Optional[str],
):
common_attributes = {
GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
GenAIAttributes.GEN_AI_REQUEST_MODEL
],
}
if error_type:
common_attributes["error.type"] = error_type
if result and getattr(result, "model", None):
common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model
if result and getattr(result, "service_tier", None):
common_attributes[
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
] = result.service_tier
if result and getattr(result, "system_fingerprint", None):
common_attributes["gen_ai.openai.response.system_fingerprint"] = (
result.system_fingerprint
)
if ServerAttributes.SERVER_ADDRESS in span_attributes:
common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
ServerAttributes.SERVER_ADDRESS
]
if ServerAttributes.SERVER_PORT in span_attributes:
common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
ServerAttributes.SERVER_PORT
]
instruments.operation_duration_histogram.record(
duration,
attributes=common_attributes,
)
if result and getattr(result, "usage", None):
input_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
}
instruments.token_usage_histogram.record(
result.usage.prompt_tokens,
attributes=input_attributes,
)
completion_attributes = {
**common_attributes,
GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
}
instruments.token_usage_histogram.record(
result.usage.completion_tokens,
attributes=completion_attributes,
)
def _set_response_attributes(
span, result, event_logger: EventLogger, capture_content: bool
):

View File

@ -0,0 +1,133 @@
interactions:
- request:
body: |-
{
"messages": [
{
"role": "user",
"content": "Say this is a test"
}
],
"model": "gpt-4o-mini",
"stream": false
}
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
authorization:
- Bearer test_openai_api_key
connection:
- keep-alive
content-length:
- '106'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- AsyncOpenAI/Python 1.26.0
x-stainless-arch:
- arm64
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.26.0
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.5
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: |-
{
"id": "chatcmpl-ASv9R2E7Yhb2e7bj4Xl0qm9s3J42Y",
"object": "chat.completion",
"created": 1731456237,
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test. How can I assist you further?",
"refusal": null
},
"logprobs": null,
"finish_reason": "stop"
}
],
"service_tier": "default",
"usage": {
"prompt_tokens": 12,
"completion_tokens": 12,
"total_tokens": 24,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"system_fingerprint": "fp_0ba0d124f1"
}
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e1a80679a8311a6-MRS
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Wed, 13 Nov 2024 00:03:58 GMT
Server:
- cloudflare
Set-Cookie: test_set_cookie
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
content-length:
- '796'
openai-organization: test_openai_org_id
openai-processing-ms:
- '359'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999978'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_41ea134c1fc450d4ca4cf8d0c6a7c53a
status:
code: 200
message: OK
version: 1

View File

@ -0,0 +1,135 @@
interactions:
- request:
body: |-
{
"messages": [
{
"role": "user",
"content": "Say this is a test"
}
],
"model": "gpt-4o-mini",
"stream": false
}
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
authorization:
- Bearer test_openai_api_key
connection:
- keep-alive
content-length:
- '106'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.54.3
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.54.3
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.6
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: |-
{
"id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
"object": "chat.completion",
"created": 1731368630,
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test.",
"refusal": null
},
"logprobs": null,
"finish_reason": "stop"
}
],
"service_tier": "default",
"usage": {
"prompt_tokens": 12,
"completion_tokens": 5,
"total_tokens": 17,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"system_fingerprint": "fp_0ba0d124f1"
}
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e122593ff368bc8-SIN
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 11 Nov 2024 23:43:50 GMT
Server:
- cloudflare
Set-Cookie: test_set_cookie
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
content-length:
- '765'
openai-organization: test_openai_org_id
openai-processing-ms:
- '287'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '199977'
x-ratelimit-reset-requests:
- 8.64s
x-ratelimit-reset-tokens:
- 6ms
x-request-id:
- req_58cff97afd0e7c0bba910ccf0b044a6f
status:
code: 200
message: OK
version: 1

View File

@ -17,6 +17,17 @@ from opentelemetry.sdk._logs.export import (
InMemoryLogExporter,
SimpleLogRecordProcessor,
)
from opentelemetry.sdk.metrics import (
Histogram,
MeterProvider,
)
from opentelemetry.sdk.metrics.export import (
InMemoryMetricReader,
)
from opentelemetry.sdk.metrics.view import (
ExplicitBucketHistogramAggregation,
View,
)
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@ -36,6 +47,12 @@ def fixture_log_exporter():
yield exporter
@pytest.fixture(scope="function", name="metric_reader")
def fixture_metric_reader():
exporter = InMemoryMetricReader()
yield exporter
@pytest.fixture(scope="function", name="tracer_provider")
def fixture_tracer_provider(span_exporter):
provider = TracerProvider()
@ -52,6 +69,62 @@ def fixture_event_logger_provider(log_exporter):
return event_logger_provider
@pytest.fixture(scope="function", name="meter_provider")
def fixture_meter_provider(metric_reader):
token_usage_histogram_view = View(
instrument_type=Histogram,
instrument_name="gen_ai.client.token.usage",
aggregation=ExplicitBucketHistogramAggregation(
boundaries=[
1,
4,
16,
64,
256,
1024,
4096,
16384,
65536,
262144,
1048576,
4194304,
16777216,
67108864,
]
),
)
duration_histogram_view = View(
instrument_type=Histogram,
instrument_name="gen_ai.client.operation.duration",
aggregation=ExplicitBucketHistogramAggregation(
boundaries=[
0.01,
0.02,
0.04,
0.08,
0.16,
0.32,
0.64,
1.28,
2.56,
5.12,
10.24,
20.48,
40.96,
81.92,
]
),
)
meter_provider = MeterProvider(
metric_readers=[metric_reader],
views=[token_usage_histogram_view, duration_histogram_view],
)
return meter_provider
@pytest.fixture(autouse=True)
def environment():
if not os.getenv("OPENAI_API_KEY"):
@ -83,7 +156,9 @@ def vcr_config():
@pytest.fixture(scope="function")
def instrument_no_content(tracer_provider, event_logger_provider):
def instrument_no_content(
tracer_provider, event_logger_provider, meter_provider
):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
)
@ -92,6 +167,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)
yield instrumentor
@ -100,7 +176,9 @@ def instrument_no_content(tracer_provider, event_logger_provider):
@pytest.fixture(scope="function")
def instrument_with_content(tracer_provider, event_logger_provider):
def instrument_with_content(
tracer_provider, event_logger_provider, meter_provider
):
os.environ.update(
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
)
@ -108,6 +186,7 @@ def instrument_with_content(tracer_provider, event_logger_provider):
instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)
yield instrumentor

View File

@ -32,6 +32,7 @@ from opentelemetry.semconv._incubating.attributes import (
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
@pytest.mark.vcr()
@ -94,7 +95,9 @@ def test_chat_completion_no_content(
assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
def test_chat_completion_bad_endpoint(
span_exporter, metric_reader, instrument_no_content
):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
@ -116,10 +119,31 @@ def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
"APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
)
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert (
duration_metric.data.data_points[0].attributes[
ErrorAttributes.ERROR_TYPE
]
== "APIConnectionError"
)
@pytest.mark.vcr()
def test_chat_completion_404(
span_exporter, openai_client, instrument_no_content
span_exporter, openai_client, metric_reader, instrument_no_content
):
llm_model_value = "this-model-does-not-exist"
messages_value = [{"role": "user", "content": "Say this is a test"}]
@ -135,6 +159,27 @@ def test_chat_completion_404(
assert_all_attributes(spans[0], llm_model_value)
assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert (
duration_metric.data.data_points[0].attributes[
ErrorAttributes.ERROR_TYPE
]
== "NotFoundError"
)
@pytest.mark.vcr()
def test_chat_completion_extra_params(

View File

@ -0,0 +1,190 @@
import pytest
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv._incubating.attributes import (
server_attributes as ServerAttributes,
)
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
def assert_all_metric_attributes(data_point):
assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
== GenAIAttributes.GenAiOperationNameValues.CHAT.value
)
assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
== GenAIAttributes.GenAiSystemValues.OPENAI.value
)
assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]
== "gpt-4o-mini"
)
assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in data_point.attributes
assert (
data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL]
== "gpt-4o-mini-2024-07-18"
)
assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes
assert (
data_point.attributes["gen_ai.openai.response.system_fingerprint"]
== "fp_0ba0d124f1"
)
assert (
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
in data_point.attributes
)
assert (
data_point.attributes[
GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
]
== "default"
)
assert (
data_point.attributes[ServerAttributes.SERVER_ADDRESS]
== "api.openai.com"
)
@pytest.mark.vcr()
def test_chat_completion_metrics(
metric_reader, openai_client, instrument_with_content
):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
openai_client.chat.completions.create(
messages=messages_value, model=llm_model_value, stream=False
)
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
assert len(metric_data) == 2
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert_all_metric_attributes(duration_metric.data.data_points[0])
token_usage_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
),
None,
)
assert token_usage_metric is not None
input_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
),
None,
)
assert input_token_usage is not None
assert input_token_usage.sum == 12
# assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
assert input_token_usage.bucket_counts[2] == 1
assert_all_metric_attributes(input_token_usage)
output_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
),
None,
)
assert output_token_usage is not None
assert output_token_usage.sum == 5
# assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
assert output_token_usage.bucket_counts[2] == 1
assert_all_metric_attributes(output_token_usage)
@pytest.mark.vcr()
@pytest.mark.asyncio()
async def test_async_chat_completion_metrics(
metric_reader, async_openai_client, instrument_with_content
):
llm_model_value = "gpt-4o-mini"
messages_value = [{"role": "user", "content": "Say this is a test"}]
await async_openai_client.chat.completions.create(
messages=messages_value, model=llm_model_value, stream=False
)
metrics = metric_reader.get_metrics_data().resource_metrics
assert len(metrics) == 1
metric_data = metrics[0].scope_metrics[0].metrics
assert len(metric_data) == 2
duration_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
),
None,
)
assert duration_metric is not None
assert duration_metric.data.data_points[0].sum > 0
assert_all_metric_attributes(duration_metric.data.data_points[0])
token_usage_metric = next(
(
m
for m in metric_data
if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
),
None,
)
assert token_usage_metric is not None
input_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
),
None,
)
assert input_token_usage is not None
assert input_token_usage.sum == 12
assert_all_metric_attributes(input_token_usage)
output_token_usage = next(
(
d
for d in token_usage_metric.data.data_points
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
),
None,
)
assert output_token_usage is not None
assert output_token_usage.sum == 12
assert_all_metric_attributes(output_token_usage)