From 7bd0895d01d14d6ee290449d4a2b385b2ae96f03 Mon Sep 17 00:00:00 2001
From: Srdjan Lulic <srdjan.lulic@elastic.co>
Date: Wed, 28 May 2025 13:17:33 +0100
Subject: [PATCH] botocore: add dict type check before parsing tool_use from
 Bedrock messages (#3548)

Amazon Bedrock tool use extraction logic doesn't agree with som of the use cases (conversation construction), such as the case with pre-created assistant message:

'messages': [{"role": "user", "content": "Placeholder text."}, {"role": "assistant", "content": "{"}],

This PR addresses this by adding a dict type check on the message content before attempting to get the value of `tool_use`from the content.
---
 CHANGELOG.md                                  |  2 +
 .../botocore/extensions/bedrock_utils.py      |  4 +-
 ...h_content_assistant_content_as_string.yaml | 78 +++++++++++++++++++
 .../tests/test_botocore_bedrock.py            | 55 +++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content_assistant_content_as_string.yaml

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b8329a6e..f69127240 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   ([#3520](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3520))
 - `opentelemetry-instrumentation-botocore` Ensure spans end on early stream closure for Bedrock Streaming APIs
   ([#3481](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3481))
+- `opentelemetry-instrumentation-botocore` Add type check when extracting tool use from Bedrock request message content
+  ([#3548](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3548))
 
 ### Breaking changes
 
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
index 7fdde97e4..a690ba597 100644
--- a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
+++ b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
@@ -409,7 +409,9 @@ def extract_tool_calls(
     tool_uses = [item["toolUse"] for item in content if "toolUse" in item]
     if not tool_uses:
         tool_uses = [
-            item for item in content if item.get("type") == "tool_use"
+            item
+            for item in content
+            if isinstance(item, dict) and item.get("type") == "tool_use"
         ]
         tool_id_key = "id"
     else:
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content_assistant_content_as_string.yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content_assistant_content_as_string.yaml
new file mode 100644
index 000000000..40e806817
--- /dev/null
+++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content_assistant_content_as_string.yaml
@@ -0,0 +1,78 @@
+interactions:
+- request:
+    body: |-
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "say this is a test"
+          },
+          {
+            "role": "assistant",
+            "content": "{"
+          }
+        ],
+        "max_tokens": 10,
+        "anthropic_version": "bedrock-2023-05-31"
+      }
+    headers:
+      Content-Length:
+      - '165'
+      User-Agent:
+      - Boto3/1.28.80 md/Botocore#1.31.80 ua/2.0 os/macos#24.5.0 md/arch#arm64 lang/python#3.12.0
+        md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.31.80
+      X-Amz-Date:
+      - 20250528T092318Z
+      X-Amz-Security-Token:
+      - test_aws_security_token
+      X-Amzn-Trace-Id:
+      - Root=1-313b2456-f7fd3ee19dd37dd03a4be55a;Parent=1533ee3b8e477491;Sampled=1
+      amz-sdk-invocation-id:
+      - 0a756b13-c341-415c-b114-337ce59bdd11
+      amz-sdk-request:
+      - attempt=1
+      authorization:
+      - Bearer test_aws_authorization
+    method: POST
+    uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/us.anthropic.claude-3-5-haiku-20241022-v1%3A0/invoke
+  response:
+    body:
+      string: |-
+        {
+          "id": "msg_bdrk_01Rxpterf4rGAoFZVDpEqFEF",
+          "type": "message",
+          "role": "assistant",
+          "model": "claude-3-5-haiku-20241022",
+          "content": [
+            {
+              "type": "text",
+              "text": "this is a test}"
+            }
+          ],
+          "stop_reason": "end_turn",
+          "stop_sequence": null,
+          "usage": {
+            "input_tokens": 13,
+            "output_tokens": 8
+          }
+        }
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 28 May 2025 09:23:18 GMT
+      Set-Cookie: test_set_cookie
+      X-Amzn-Bedrock-Input-Token-Count:
+      - '13'
+      X-Amzn-Bedrock-Invocation-Latency:
+      - '608'
+      X-Amzn-Bedrock-Output-Token-Count:
+      - '8'
+      x-amzn-RequestId:
+      - 4458f14e-5cde-44fb-a1ea-881ed935fb73
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py
index ec7038cc8..2557fe135 100644
--- a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py
+++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py
@@ -1421,6 +1421,61 @@ def test_invoke_model_with_content_user_content_as_string(
     assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
 
 
+@pytest.mark.vcr()
+def test_invoke_model_with_content_assistant_content_as_string(
+    span_exporter,
+    log_exporter,
+    bedrock_runtime_client,
+    instrument_with_content,
+):
+    llm_model_value = "us.anthropic.claude-3-5-haiku-20241022-v1:0"
+    max_tokens = 10
+    body = json.dumps(
+        {
+            "messages": [
+                {"role": "user", "content": "say this is a test"},
+                {"role": "assistant", "content": "{"},
+            ],
+            "max_tokens": max_tokens,
+            "anthropic_version": "bedrock-2023-05-31",
+        }
+    )
+    response = bedrock_runtime_client.invoke_model(
+        body=body,
+        modelId=llm_model_value,
+    )
+
+    (span,) = span_exporter.get_finished_spans()
+    assert_completion_attributes_from_streaming_body(
+        span,
+        llm_model_value,
+        response,
+        "chat",
+        request_max_tokens=max_tokens,
+    )
+
+    logs = log_exporter.get_finished_logs()
+    assert len(logs) == 3
+    user_content = {"content": "say this is a test"}
+    assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
+
+    assistant_content = {"content": "{"}
+    assert_message_in_logs(
+        logs[1], "gen_ai.assistant.message", assistant_content, span
+    )
+
+    assistant_response_message = {
+        "role": "assistant",
+        "content": [{"type": "text", "text": "this is a test}"}],
+    }
+    choice_body = {
+        "index": 0,
+        "finish_reason": "end_turn",
+        "message": assistant_response_message,
+    }
+    assert_message_in_logs(logs[2], "gen_ai.choice", choice_body, span)
+
+
 @pytest.mark.parametrize(
     "model_family",
     ["amazon.nova", "anthropic.claude"],