Add metrics to the Python OpenAI instrumentation (#3180)

2025-01-16 07:24:35 +09:00 · 2025-01-16 07:24:35 +09:00 · a716949d1c
parent 07c97eac38
commit a716949d1c
10 changed files with 763 additions and 9 deletions
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add example to `opentelemetry-instrumentation-openai-v2`
  ([#3006](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3006))
 - Support for `AsyncOpenAI/AsyncCompletions` ([#2984](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/2984))
 - Add metrics ([#3180](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3180))
 ## Version 2.0b0 (2024-11-08)
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/README.rst
@ -7,7 +7,8 @@ OpenTelemetry OpenAI Instrumentation
   :target: https://pypi.org/project/opentelemetry-instrumentation-openai-v2/
 This library allows tracing LLM requests and logging of messages made by the
-`OpenAI Python API library <https://pypi.org/project/openai/>`_.
+`OpenAI Python API library <https://pypi.org/project/openai/>`_. It also captures
 the duration of the operations and the number of tokens used as metrics.
 Installation
@ -74,6 +75,48 @@ To uninstrument clients, call the uninstrument method:
    # Uninstrument all clients
    OpenAIInstrumentor().uninstrument()
 Bucket Boundaries
 -----------------
 This section describes the explicit bucket boundaries for metrics such as token usage and operation duration, and guides users to create Views to implement them according to the semantic conventions.
 The bucket boundaries are defined as follows:
 - For `gen_ai.client.token.usage`: [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
 - For `gen_ai.client.operation.duration`: [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]
 To implement these bucket boundaries, you can create Views in your OpenTelemetry SDK setup. Here is an example:
 .. code-block:: python
    from opentelemetry.sdk.metrics import MeterProvider, View
    from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
    from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
    from opentelemetry.sdk.metrics.aggregation import ExplicitBucketHistogramAggregation
    views = [
        View(
            instrument_name="gen_ai.client.token.usage",
            aggregation=ExplicitBucketHistogramAggregation([1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]),
        ),
        View(
            instrument_name="gen_ai.client.operation.duration",
            aggregation=ExplicitBucketHistogramAggregation([0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92]),
        ),
    ]
    metric_exporter = OTLPMetricExporter(endpoint="http://localhost:4317")
    metric_reader = PeriodicExportingMetricReader(metric_exporter)
    provider = MeterProvider(
        metric_readers=[metric_reader],
        views=views
    )
    from opentelemetry.sdk.metrics import set_meter_provider
    set_meter_provider(provider)
 For more details, refer to the `OpenTelemetry GenAI Metrics documentation <https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/>`_.
 References
 ----------
 * `OpenTelemetry OpenAI Instrumentation <https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/openai/openai.html>`_
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/init.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/init.py
@ -49,13 +49,18 @@ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
 from opentelemetry.instrumentation.openai_v2.package import _instruments
 from opentelemetry.instrumentation.openai_v2.utils import is_content_enabled
 from opentelemetry.instrumentation.utils import unwrap
 from opentelemetry.metrics import get_meter
 from opentelemetry.semconv.schemas import Schemas
 from opentelemetry.trace import get_tracer
 from .instruments import Instruments
 from .patch import async_chat_completions_create, chat_completions_create
 class OpenAIInstrumentor(BaseInstrumentor):
    def __init__(self):
        self._meter = None
    def instrumentation_dependencies(self) -> Collection[str]:
        return _instruments
@ -75,12 +80,21 @@ class OpenAIInstrumentor(BaseInstrumentor):
            schema_url=Schemas.V1_28_0.value,
            event_logger_provider=event_logger_provider,
        )
        meter_provider = kwargs.get("meter_provider")
        self._meter = get_meter(
            __name__,
            "",
            meter_provider,
            schema_url=Schemas.V1_28_0.value,
        )
        instruments = Instruments(self._meter)
        wrap_function_wrapper(
            module="openai.resources.chat.completions",
            name="Completions.create",
            wrapper=chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, instruments, is_content_enabled()
            ),
        )
@ -88,7 +102,7 @@ class OpenAIInstrumentor(BaseInstrumentor):
            module="openai.resources.chat.completions",
            name="AsyncCompletions.create",
            wrapper=async_chat_completions_create(
-                tracer, event_logger, is_content_enabled()
+                tracer, event_logger, instruments, is_content_enabled()
            ),
        )
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
@ -0,0 +1,11 @@
 from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
 class Instruments:
    def __init__(self, meter):
        self.operation_duration_histogram = (
            gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
        )
        self.token_usage_histogram = (
            gen_ai_metrics.create_gen_ai_client_token_usage(meter)
        )
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@ -13,6 +13,7 @@
 # limitations under the License.
 from timeit import default_timer
 from typing import Optional
 from openai import Stream
@ -21,8 +22,12 @@ from opentelemetry._events import Event, EventLogger
 from opentelemetry.semconv._incubating.attributes import (
    gen_ai_attributes as GenAIAttributes,
 )
 from opentelemetry.semconv._incubating.attributes import (
    server_attributes as ServerAttributes,
 )
 from opentelemetry.trace import Span, SpanKind, Tracer
 from .instruments import Instruments
 from .utils import (
    choice_to_event,
    get_llm_request_attributes,
@ -34,7 +39,10 @@ from .utils import (
 def chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
    event_logger: EventLogger,
    instruments: Instruments,
    capture_content: bool,
 ):
    """Wrap the `create` method of the `ChatCompletion` class to trace it."""
@ -54,6 +62,9 @@ def chat_completions_create(
                        message_to_event(message, capture_content)
                    )
            start = default_timer()
            result = None
            error_type = None
            try:
                result = wrapped(*args, **kwargs)
                if is_streaming(kwargs):
@ -69,14 +80,27 @@ def chat_completions_create(
                return result
            except Exception as error:
                error_type = type(error).__qualname__
                handle_span_exception(span, error)
                raise
            finally:
                duration = max((default_timer() - start), 0)
                _record_metrics(
                    instruments,
                    duration,
                    result,
                    span_attributes,
                    error_type,
                )
    return traced_method
 def async_chat_completions_create(
-    tracer: Tracer, event_logger: EventLogger, capture_content: bool
+    tracer: Tracer,
    event_logger: EventLogger,
    instruments: Instruments,
    capture_content: bool,
 ):
    """Wrap the `create` method of the `AsyncChatCompletion` class to trace it."""
@ -96,6 +120,9 @@ def async_chat_completions_create(
                        message_to_event(message, capture_content)
                    )
            start = default_timer()
            result = None
            error_type = None
            try:
                result = await wrapped(*args, **kwargs)
                if is_streaming(kwargs):
@ -111,12 +138,88 @@ def async_chat_completions_create(
                return result
            except Exception as error:
                error_type = type(error).__qualname__
                handle_span_exception(span, error)
                raise
            finally:
                duration = max((default_timer() - start), 0)
                _record_metrics(
                    instruments,
                    duration,
                    result,
                    span_attributes,
                    error_type,
                )
    return traced_method
 def _record_metrics(
    instruments: Instruments,
    duration: float,
    result,
    span_attributes: dict,
    error_type: Optional[str],
 ):
    common_attributes = {
        GenAIAttributes.GEN_AI_OPERATION_NAME: GenAIAttributes.GenAiOperationNameValues.CHAT.value,
        GenAIAttributes.GEN_AI_SYSTEM: GenAIAttributes.GenAiSystemValues.OPENAI.value,
        GenAIAttributes.GEN_AI_REQUEST_MODEL: span_attributes[
            GenAIAttributes.GEN_AI_REQUEST_MODEL
        ],
    }
    if error_type:
        common_attributes["error.type"] = error_type
    if result and getattr(result, "model", None):
        common_attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL] = result.model
    if result and getattr(result, "service_tier", None):
        common_attributes[
            GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
        ] = result.service_tier
    if result and getattr(result, "system_fingerprint", None):
        common_attributes["gen_ai.openai.response.system_fingerprint"] = (
            result.system_fingerprint
        )
    if ServerAttributes.SERVER_ADDRESS in span_attributes:
        common_attributes[ServerAttributes.SERVER_ADDRESS] = span_attributes[
            ServerAttributes.SERVER_ADDRESS
        ]
    if ServerAttributes.SERVER_PORT in span_attributes:
        common_attributes[ServerAttributes.SERVER_PORT] = span_attributes[
            ServerAttributes.SERVER_PORT
        ]
    instruments.operation_duration_histogram.record(
        duration,
        attributes=common_attributes,
    )
    if result and getattr(result, "usage", None):
        input_attributes = {
            **common_attributes,
            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.INPUT.value,
        }
        instruments.token_usage_histogram.record(
            result.usage.prompt_tokens,
            attributes=input_attributes,
        )
        completion_attributes = {
            **common_attributes,
            GenAIAttributes.GEN_AI_TOKEN_TYPE: GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value,
        }
        instruments.token_usage_histogram.record(
            result.usage.completion_tokens,
            attributes=completion_attributes,
        )
 def _set_response_attributes(
    span, result, event_logger: EventLogger, capture_content: bool
 ):
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_async_chat_completion_metrics.yaml
@ -0,0 +1,133 @@
 interactions:
 - request:
    body: |-
      {
        "messages": [
          {
            "role": "user",
            "content": "Say this is a test"
          }
        ],
        "model": "gpt-4o-mini",
        "stream": false
      }
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      authorization:
      - Bearer test_openai_api_key
      connection:
      - keep-alive
      content-length:
      - '106'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - AsyncOpenAI/Python 1.26.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - async:asyncio
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.26.0
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.12.5
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: |-
        {
          "id": "chatcmpl-ASv9R2E7Yhb2e7bj4Xl0qm9s3J42Y",
          "object": "chat.completion",
          "created": 1731456237,
          "model": "gpt-4o-mini-2024-07-18",
          "choices": [
            {
              "index": 0,
              "message": {
                "role": "assistant",
                "content": "This is a test. How can I assist you further?",
                "refusal": null
              },
              "logprobs": null,
              "finish_reason": "stop"
            }
          ],
          "service_tier": "default",
          "usage": {
            "prompt_tokens": 12,
            "completion_tokens": 12,
            "total_tokens": 24,
            "prompt_tokens_details": {
              "cached_tokens": 0,
              "audio_tokens": 0
            },
            "completion_tokens_details": {
              "reasoning_tokens": 0,
              "audio_tokens": 0,
              "accepted_prediction_tokens": 0,
              "rejected_prediction_tokens": 0
            }
          },
          "system_fingerprint": "fp_0ba0d124f1"
        }
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
      - 8e1a80679a8311a6-MRS
      Connection:
      - keep-alive
      Content-Type:
      - application/json
      Date:
      - Wed, 13 Nov 2024 00:03:58 GMT
      Server:
      - cloudflare
      Set-Cookie: test_set_cookie
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      content-length:
      - '796'
      openai-organization: test_openai_org_id
      openai-processing-ms:
      - '359'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '30000'
      x-ratelimit-limit-tokens:
      - '150000000'
      x-ratelimit-remaining-requests:
      - '29999'
      x-ratelimit-remaining-tokens:
      - '149999978'
      x-ratelimit-reset-requests:
      - 2ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_41ea134c1fc450d4ca4cf8d0c6a7c53a
    status:
      code: 200
      message: OK
 version: 1
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_metrics.yaml
@ -0,0 +1,135 @@
 interactions:
 - request:
    body: |-
      {
        "messages": [
          {
            "role": "user",
            "content": "Say this is a test"
          }
        ],
        "model": "gpt-4o-mini",
        "stream": false
      }
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      authorization:
      - Bearer test_openai_api_key
      connection:
      - keep-alive
      content-length:
      - '106'
      content-type:
      - application/json
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.54.3
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.54.3
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.12.6
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: |-
        {
          "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
          "object": "chat.completion",
          "created": 1731368630,
          "model": "gpt-4o-mini-2024-07-18",
          "choices": [
            {
              "index": 0,
              "message": {
                "role": "assistant",
                "content": "This is a test.",
                "refusal": null
              },
              "logprobs": null,
              "finish_reason": "stop"
            }
          ],
          "service_tier": "default",
          "usage": {
            "prompt_tokens": 12,
            "completion_tokens": 5,
            "total_tokens": 17,
            "prompt_tokens_details": {
              "cached_tokens": 0,
              "audio_tokens": 0
            },
            "completion_tokens_details": {
              "reasoning_tokens": 0,
              "audio_tokens": 0,
              "accepted_prediction_tokens": 0,
              "rejected_prediction_tokens": 0
            }
          },
          "system_fingerprint": "fp_0ba0d124f1"
        }
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
      - 8e122593ff368bc8-SIN
      Connection:
      - keep-alive
      Content-Type:
      - application/json
      Date:
      - Mon, 11 Nov 2024 23:43:50 GMT
      Server:
      - cloudflare
      Set-Cookie: test_set_cookie
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      alt-svc:
      - h3=":443"; ma=86400
      content-length:
      - '765'
      openai-organization: test_openai_org_id
      openai-processing-ms:
      - '287'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '10000'
      x-ratelimit-limit-tokens:
      - '200000'
      x-ratelimit-remaining-requests:
      - '9999'
      x-ratelimit-remaining-tokens:
      - '199977'
      x-ratelimit-reset-requests:
      - 8.64s
      x-ratelimit-reset-tokens:
      - 6ms
      x-request-id:
      - req_58cff97afd0e7c0bba910ccf0b044a6f
    status:
      code: 200
      message: OK
 version: 1
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/conftest.py
@ -17,6 +17,17 @@ from opentelemetry.sdk._logs.export import (
    InMemoryLogExporter,
    SimpleLogRecordProcessor,
 )
 from opentelemetry.sdk.metrics import (
    Histogram,
    MeterProvider,
 )
 from opentelemetry.sdk.metrics.export import (
    InMemoryMetricReader,
 )
 from opentelemetry.sdk.metrics.view import (
    ExplicitBucketHistogramAggregation,
    View,
 )
 from opentelemetry.sdk.trace import TracerProvider
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor
 from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@ -36,6 +47,12 @@ def fixture_log_exporter():
    yield exporter
@pytest.fixture(scope="function", name="metric_reader")
 def fixture_metric_reader():
    exporter = InMemoryMetricReader()
    yield exporter
@pytest.fixture(scope="function", name="tracer_provider")
 def fixture_tracer_provider(span_exporter):
    provider = TracerProvider()
@ -52,6 +69,62 @@ def fixture_event_logger_provider(log_exporter):
    return event_logger_provider
@pytest.fixture(scope="function", name="meter_provider")
 def fixture_meter_provider(metric_reader):
    token_usage_histogram_view = View(
        instrument_type=Histogram,
        instrument_name="gen_ai.client.token.usage",
        aggregation=ExplicitBucketHistogramAggregation(
            boundaries=[
                1,
                4,
                16,
                64,
                256,
                1024,
                4096,
                16384,
                65536,
                262144,
                1048576,
                4194304,
                16777216,
                67108864,
            ]
        ),
    )
    duration_histogram_view = View(
        instrument_type=Histogram,
        instrument_name="gen_ai.client.operation.duration",
        aggregation=ExplicitBucketHistogramAggregation(
            boundaries=[
                0.01,
                0.02,
                0.04,
                0.08,
                0.16,
                0.32,
                0.64,
                1.28,
                2.56,
                5.12,
                10.24,
                20.48,
                40.96,
                81.92,
            ]
        ),
    )
    meter_provider = MeterProvider(
        metric_readers=[metric_reader],
        views=[token_usage_histogram_view, duration_histogram_view],
    )
    return meter_provider
@pytest.fixture(autouse=True)
 def environment():
    if not os.getenv("OPENAI_API_KEY"):
@ -83,7 +156,9 @@ def vcr_config():
@pytest.fixture(scope="function")
-def instrument_no_content(tracer_provider, event_logger_provider):
+def instrument_no_content(
    tracer_provider, event_logger_provider, meter_provider
 ):
    os.environ.update(
        {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
    )
@ -92,6 +167,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
    instrumentor.instrument(
        tracer_provider=tracer_provider,
        event_logger_provider=event_logger_provider,
        meter_provider=meter_provider,
    )
    yield instrumentor
@ -100,7 +176,9 @@ def instrument_no_content(tracer_provider, event_logger_provider):
@pytest.fixture(scope="function")
-def instrument_with_content(tracer_provider, event_logger_provider):
+def instrument_with_content(
    tracer_provider, event_logger_provider, meter_provider
 ):
    os.environ.update(
        {OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
    )
@ -108,6 +186,7 @@ def instrument_with_content(tracer_provider, event_logger_provider):
    instrumentor.instrument(
        tracer_provider=tracer_provider,
        event_logger_provider=event_logger_provider,
        meter_provider=meter_provider,
    )
    yield instrumentor
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_completions.py
@ -32,6 +32,7 @@ from opentelemetry.semconv._incubating.attributes import (
 from opentelemetry.semconv._incubating.attributes import (
    server_attributes as ServerAttributes,
 )
 from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
@pytest.mark.vcr()
@ -94,7 +95,9 @@ def test_chat_completion_no_content(
    assert_message_in_logs(logs[1], "gen_ai.choice", choice_event, spans[0])
-def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
+def test_chat_completion_bad_endpoint(
    span_exporter, metric_reader, instrument_no_content
 ):
    llm_model_value = "gpt-4o-mini"
    messages_value = [{"role": "user", "content": "Say this is a test"}]
@ -116,10 +119,31 @@ def test_chat_completion_bad_endpoint(span_exporter, instrument_no_content):
        "APIConnectionError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
    )
    metrics = metric_reader.get_metrics_data().resource_metrics
    assert len(metrics) == 1
    metric_data = metrics[0].scope_metrics[0].metrics
    duration_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
        ),
        None,
    )
    assert duration_metric is not None
    assert duration_metric.data.data_points[0].sum > 0
    assert (
        duration_metric.data.data_points[0].attributes[
            ErrorAttributes.ERROR_TYPE
        ]
        == "APIConnectionError"
    )
@pytest.mark.vcr()
 def test_chat_completion_404(
-    span_exporter, openai_client, instrument_no_content
+    span_exporter, openai_client, metric_reader, instrument_no_content
 ):
    llm_model_value = "this-model-does-not-exist"
    messages_value = [{"role": "user", "content": "Say this is a test"}]
@ -135,6 +159,27 @@ def test_chat_completion_404(
    assert_all_attributes(spans[0], llm_model_value)
    assert "NotFoundError" == spans[0].attributes[ErrorAttributes.ERROR_TYPE]
    metrics = metric_reader.get_metrics_data().resource_metrics
    assert len(metrics) == 1
    metric_data = metrics[0].scope_metrics[0].metrics
    duration_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
        ),
        None,
    )
    assert duration_metric is not None
    assert duration_metric.data.data_points[0].sum > 0
    assert (
        duration_metric.data.data_points[0].attributes[
            ErrorAttributes.ERROR_TYPE
        ]
        == "NotFoundError"
    )
@pytest.mark.vcr()
 def test_chat_completion_extra_params(
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_chat_metrics.py
@ -0,0 +1,190 @@
 import pytest
 from opentelemetry.semconv._incubating.attributes import (
    gen_ai_attributes as GenAIAttributes,
 )
 from opentelemetry.semconv._incubating.attributes import (
    server_attributes as ServerAttributes,
 )
 from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
 def assert_all_metric_attributes(data_point):
    assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
    assert (
        data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
        == GenAIAttributes.GenAiOperationNameValues.CHAT.value
    )
    assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
    assert (
        data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
        == GenAIAttributes.GenAiSystemValues.OPENAI.value
    )
    assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
    assert (
        data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]
        == "gpt-4o-mini"
    )
    assert GenAIAttributes.GEN_AI_RESPONSE_MODEL in data_point.attributes
    assert (
        data_point.attributes[GenAIAttributes.GEN_AI_RESPONSE_MODEL]
        == "gpt-4o-mini-2024-07-18"
    )
    assert "gen_ai.openai.response.system_fingerprint" in data_point.attributes
    assert (
        data_point.attributes["gen_ai.openai.response.system_fingerprint"]
        == "fp_0ba0d124f1"
    )
    assert (
        GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
        in data_point.attributes
    )
    assert (
        data_point.attributes[
            GenAIAttributes.GEN_AI_OPENAI_RESPONSE_SERVICE_TIER
        ]
        == "default"
    )
    assert (
        data_point.attributes[ServerAttributes.SERVER_ADDRESS]
        == "api.openai.com"
    )
@pytest.mark.vcr()
 def test_chat_completion_metrics(
    metric_reader, openai_client, instrument_with_content
 ):
    llm_model_value = "gpt-4o-mini"
    messages_value = [{"role": "user", "content": "Say this is a test"}]
    openai_client.chat.completions.create(
        messages=messages_value, model=llm_model_value, stream=False
    )
    metrics = metric_reader.get_metrics_data().resource_metrics
    assert len(metrics) == 1
    metric_data = metrics[0].scope_metrics[0].metrics
    assert len(metric_data) == 2
    duration_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
        ),
        None,
    )
    assert duration_metric is not None
    assert duration_metric.data.data_points[0].sum > 0
    assert_all_metric_attributes(duration_metric.data.data_points[0])
    token_usage_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
        ),
        None,
    )
    assert token_usage_metric is not None
    input_token_usage = next(
        (
            d
            for d in token_usage_metric.data.data_points
            if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
            == GenAIAttributes.GenAiTokenTypeValues.INPUT.value
        ),
        None,
    )
    assert input_token_usage is not None
    assert input_token_usage.sum == 12
    # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
    assert input_token_usage.bucket_counts[2] == 1
    assert_all_metric_attributes(input_token_usage)
    output_token_usage = next(
        (
            d
            for d in token_usage_metric.data.data_points
            if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
            == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
        ),
        None,
    )
    assert output_token_usage is not None
    assert output_token_usage.sum == 5
    # assert against buckets [1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864]
    assert output_token_usage.bucket_counts[2] == 1
    assert_all_metric_attributes(output_token_usage)
@pytest.mark.vcr()
@pytest.mark.asyncio()
 async def test_async_chat_completion_metrics(
    metric_reader, async_openai_client, instrument_with_content
 ):
    llm_model_value = "gpt-4o-mini"
    messages_value = [{"role": "user", "content": "Say this is a test"}]
    await async_openai_client.chat.completions.create(
        messages=messages_value, model=llm_model_value, stream=False
    )
    metrics = metric_reader.get_metrics_data().resource_metrics
    assert len(metrics) == 1
    metric_data = metrics[0].scope_metrics[0].metrics
    assert len(metric_data) == 2
    duration_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION
        ),
        None,
    )
    assert duration_metric is not None
    assert duration_metric.data.data_points[0].sum > 0
    assert_all_metric_attributes(duration_metric.data.data_points[0])
    token_usage_metric = next(
        (
            m
            for m in metric_data
            if m.name == gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE
        ),
        None,
    )
    assert token_usage_metric is not None
    input_token_usage = next(
        (
            d
            for d in token_usage_metric.data.data_points
            if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
            == GenAIAttributes.GenAiTokenTypeValues.INPUT.value
        ),
        None,
    )
    assert input_token_usage is not None
    assert input_token_usage.sum == 12
    assert_all_metric_attributes(input_token_usage)
    output_token_usage = next(
        (
            d
            for d in token_usage_metric.data.data_points
            if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
            == GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
        ),
        None,
    )
    assert output_token_usage is not None
    assert output_token_usage.sum == 12
    assert_all_metric_attributes(output_token_usage)