gen ai uploader encode bytes as base64 (#3771)

This commit is contained in:
Aaron Abbott 2025-09-19 12:10:39 -04:00 committed by GitHub
parent 4fb00c9894
commit 9fab62bcea
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 50 additions and 19 deletions

View File

@ -19,6 +19,7 @@ import json
import logging import logging
import posixpath import posixpath
import threading import threading
from base64 import b64encode
from concurrent.futures import Future, ThreadPoolExecutor from concurrent.futures import Future, ThreadPoolExecutor
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass
from functools import partial from functools import partial
@ -151,7 +152,12 @@ class FsspecUploadHook(UploadHook):
path: str, json_encodeable: Callable[[], JsonEncodeable] path: str, json_encodeable: Callable[[], JsonEncodeable]
) -> None: ) -> None:
with fsspec_open(path, "w") as file: with fsspec_open(path, "w") as file:
json.dump(json_encodeable(), file, separators=(",", ":")) json.dump(
json_encodeable(),
file,
separators=(",", ":"),
cls=Base64JsonEncoder,
)
def upload( def upload(
self, self,
@ -206,3 +212,10 @@ class FsspecUploadHook(UploadHook):
def shutdown(self) -> None: def shutdown(self) -> None:
# TODO: support timeout # TODO: support timeout
self._executor.shutdown() self._executor.shutdown()
class Base64JsonEncoder(json.JSONEncoder):
def default(self, o: Any) -> Any:
if isinstance(o, bytes):
return b64encode(o).decode()
return super().default(o)

View File

@ -14,7 +14,6 @@
# pylint: disable=import-outside-toplevel,no-name-in-module # pylint: disable=import-outside-toplevel,no-name-in-module
import importlib import importlib
import logging import logging
import sys import sys
@ -200,26 +199,31 @@ class FsspecUploaderTest(TestCase):
class TestFsspecUploadHookIntegration(TestBase): class TestFsspecUploadHookIntegration(TestBase):
def setUp(self):
super().setUp()
self.hook = FsspecUploadHook(base_path=BASE_PATH)
def tearDown(self):
super().tearDown()
self.hook.shutdown()
def assert_fsspec_equal(self, path: str, value: str) -> None: def assert_fsspec_equal(self, path: str, value: str) -> None:
with fsspec.open(path, "r") as file: with fsspec.open(path, "r") as file:
self.assertEqual(file.read(), value) self.assertEqual(file.read(), value)
def test_upload_completions(self): def test_upload_completions(self):
hook = FsspecUploadHook(
base_path=BASE_PATH,
)
tracer = self.tracer_provider.get_tracer(__name__) tracer = self.tracer_provider.get_tracer(__name__)
log_record = LogRecord() log_record = LogRecord()
with tracer.start_as_current_span("chat mymodel") as span: with tracer.start_as_current_span("chat mymodel") as span:
hook.upload( self.hook.upload(
inputs=FAKE_INPUTS, inputs=FAKE_INPUTS,
outputs=FAKE_OUTPUTS, outputs=FAKE_OUTPUTS,
system_instruction=FAKE_SYSTEM_INSTRUCTION, system_instruction=FAKE_SYSTEM_INSTRUCTION,
span=span, span=span,
log_record=log_record, log_record=log_record,
) )
hook.shutdown() self.hook.shutdown()
finished_spans = self.get_finished_spans() finished_spans = self.get_finished_spans()
self.assertEqual(len(finished_spans), 1) self.assertEqual(len(finished_spans), 1)
@ -250,25 +254,39 @@ class TestFsspecUploadHookIntegration(TestBase):
'[{"content":"You are a helpful assistant.","type":"text"}]', '[{"content":"You are a helpful assistant.","type":"text"}]',
) )
@staticmethod def test_stamps_empty_log(self):
def upload_with_log(log_record: LogRecord): log_record = LogRecord()
hook = FsspecUploadHook( self.hook.upload(
base_path=BASE_PATH,
)
hook.upload(
inputs=FAKE_INPUTS, inputs=FAKE_INPUTS,
outputs=FAKE_OUTPUTS, outputs=FAKE_OUTPUTS,
system_instruction=FAKE_SYSTEM_INSTRUCTION, system_instruction=FAKE_SYSTEM_INSTRUCTION,
log_record=log_record, log_record=log_record,
) )
hook.shutdown()
def test_stamps_empty_log(self):
log_record = LogRecord()
self.upload_with_log(log_record)
# stamp on both body and attributes # stamp on both body and attributes
self.assertIn("gen_ai.input.messages_ref", log_record.attributes) self.assertIn("gen_ai.input.messages_ref", log_record.attributes)
self.assertIn("gen_ai.output.messages_ref", log_record.attributes) self.assertIn("gen_ai.output.messages_ref", log_record.attributes)
self.assertIn("gen_ai.system_instructions_ref", log_record.attributes) self.assertIn("gen_ai.system_instructions_ref", log_record.attributes)
def test_upload_bytes(self) -> None:
log_record = LogRecord()
self.hook.upload(
inputs=[
types.InputMessage(
role="user",
parts=[
types.Text(content="What is the capital of France?"),
{"type": "generic_bytes", "bytes": b"hello"},
],
)
],
outputs=FAKE_OUTPUTS,
system_instruction=FAKE_SYSTEM_INSTRUCTION,
log_record=log_record,
)
self.hook.shutdown()
self.assert_fsspec_equal(
log_record.attributes["gen_ai.input.messages_ref"],
'[{"role":"user","parts":[{"content":"What is the capital of France?","type":"text"},{"type":"generic_bytes","bytes":"aGVsbG8="}]}]',
)