Skip to content

Commit ba639de

Browse files
committed
fix(openai-agents): fix realtime session event handling for prompts, completions, and usage
Handle history_updated events to capture assistant transcript updates. Fix dict-based data access in response.done handler where getattr was used on dicts instead of .get(), silently returning None. Fix dict-case event unwrapping where the data variable was not updated to the nested level. Remove dead response event handler that could never match. Closes #3685
1 parent c6b94b5 commit ba639de

File tree

2 files changed

+156
-31
lines changed

2 files changed

+156
-31
lines changed

packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -529,33 +529,34 @@ async def traced_put_event(self, event):
529529
if content and role == "assistant":
530530
state.record_completion(role, content)
531531

532-
elif event_type == "response":
533-
output = getattr(event, "output", None)
534-
if output and isinstance(output, list):
535-
for item in output:
532+
elif event_type == "history_updated":
533+
history = getattr(event, "history", None)
534+
if history and isinstance(history, list):
535+
for item in reversed(history):
536536
role = getattr(item, "role", None)
537537
if role == "assistant":
538538
item_content = getattr(item, "content", None)
539-
if item_content:
540-
if isinstance(item_content, list):
541-
for part in item_content:
542-
text = getattr(part, "text", None)
543-
if text:
544-
state.record_completion(role, text)
545-
break
546-
elif isinstance(item_content, str):
547-
state.record_completion(role, item_content)
539+
if item_content and isinstance(item_content, list):
540+
for part in item_content:
541+
text = getattr(part, "text", None) or getattr(
542+
part, "transcript", None
543+
)
544+
if text:
545+
state.record_completion(role, text)
546+
break
547+
break
548548

549549
elif event_type == "raw_model_event":
550550
data = getattr(event, "data", None)
551551
if data:
552552
if isinstance(data, dict):
553553
data_type = data.get("type")
554-
raw_data = data.get("data", data)
555-
if isinstance(raw_data, dict):
554+
raw_data = data.get("data")
555+
if raw_data and isinstance(raw_data, dict):
556556
nested_type = raw_data.get("type")
557557
if nested_type:
558558
data_type = nested_type
559+
data = raw_data
559560
else:
560561
data_type = getattr(data, "type", None)
561562
nested_data = getattr(data, "data", None)
@@ -586,28 +587,38 @@ async def traced_put_event(self, event):
586587
if usage:
587588
state.record_usage(usage)
588589

589-
output = getattr(response, "output", None)
590+
if isinstance(response, dict):
591+
output = response.get("output")
592+
else:
593+
output = getattr(response, "output", None)
590594
if output and isinstance(output, list):
591595
for item in output:
592-
item_type = getattr(item, "type", None)
593-
if item_type == "message":
596+
if isinstance(item, dict):
597+
item_type = item.get("type")
598+
role = item.get("role")
599+
item_content = item.get("content")
600+
else:
601+
item_type = getattr(item, "type", None)
594602
role = getattr(item, "role", None)
595-
if role == "assistant":
596-
item_content = getattr(
597-
item, "content", None
598-
)
599-
if item_content and isinstance(
600-
item_content, list
601-
):
602-
for part in item_content:
603+
item_content = getattr(
604+
item, "content", None
605+
)
606+
if item_type == "message" and role == "assistant":
607+
if item_content and isinstance(
608+
item_content, list
609+
):
610+
for part in item_content:
611+
if isinstance(part, dict):
612+
text = part.get("text")
613+
else:
603614
text = getattr(
604615
part, "text", None
605616
)
606-
if text:
607-
state.record_completion(
608-
role, text
609-
)
610-
break
617+
if text:
618+
state.record_completion(
619+
role, text
620+
)
621+
break
611622

612623
elif data_type == "item_updated":
613624
item = getattr(data, "item", None)

packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,117 @@ def test_audio_span_parented_under_current_agent(self, tracer, tracer_provider):
475475
audio_span = next(s for s in spans if s.name == "openai.realtime")
476476

477477
assert audio_span.parent.span_id == agent_span.context.span_id
478+
479+
480+
class TestTracedPutEventHandlers:
481+
"""Tests for event handling in traced_put_event via RealtimeTracingState."""
482+
483+
def test_history_updated_captures_assistant_completion(self, tracer, tracer_provider):
484+
"""Test that history_updated events capture assistant completions."""
485+
_, exporter = tracer_provider
486+
state = RealtimeTracingState(tracer)
487+
state.start_workflow_span("Test Agent")
488+
state.start_agent_span("Voice Assistant")
489+
490+
state.record_prompt("user", "Hello")
491+
492+
# Simulate what history_updated handler does: scan history for assistant content
493+
class MockContent:
494+
def __init__(self, text=None, transcript=None):
495+
self.text = text
496+
self.transcript = transcript
497+
498+
class MockItem:
499+
def __init__(self, role, content):
500+
self.role = role
501+
self.content = content
502+
503+
history = [
504+
MockItem("user", [MockContent(text="Hello")]),
505+
MockItem("assistant", [MockContent(transcript="Hi there!")]),
506+
]
507+
508+
# Replicate the history_updated handler logic
509+
for item in reversed(history):
510+
role = getattr(item, "role", None)
511+
if role == "assistant":
512+
item_content = getattr(item, "content", None)
513+
if item_content and isinstance(item_content, list):
514+
for part in item_content:
515+
text = getattr(part, "text", None) or getattr(
516+
part, "transcript", None
517+
)
518+
if text:
519+
state.record_completion(role, text)
520+
break
521+
break
522+
523+
state.cleanup()
524+
state.end_workflow_span()
525+
526+
spans = exporter.get_finished_spans()
527+
llm_spans = [s for s in spans if s.name == "openai.realtime"]
528+
assert len(llm_spans) == 1
529+
assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!"
530+
531+
def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_provider):
532+
"""Test that response.done with dict data captures usage and completions."""
533+
_, exporter = tracer_provider
534+
state = RealtimeTracingState(tracer)
535+
state.start_workflow_span("Test Agent")
536+
state.start_agent_span("Voice Assistant")
537+
538+
state.record_prompt("user", "What is the weather?")
539+
540+
# Simulate dict-based response.done data (as sent by OpenAI raw API)
541+
response_done_data = {
542+
"type": "response.done",
543+
"response": {
544+
"usage": {
545+
"input_tokens": 42,
546+
"output_tokens": 18,
547+
"total_tokens": 60,
548+
},
549+
"output": [
550+
{
551+
"type": "message",
552+
"role": "assistant",
553+
"content": [
554+
{"type": "text", "text": "It is sunny today."}
555+
],
556+
}
557+
],
558+
},
559+
}
560+
561+
# Extract usage from dict
562+
response = response_done_data.get("response", {})
563+
usage = response.get("usage") if isinstance(response, dict) else None
564+
if usage:
565+
state.record_usage(usage)
566+
567+
# Extract completion from dict
568+
output = response.get("output") if isinstance(response, dict) else None
569+
if output and isinstance(output, list):
570+
for item in output:
571+
if isinstance(item, dict):
572+
if item.get("type") == "message" and item.get("role") == "assistant":
573+
item_content = item.get("content")
574+
if item_content and isinstance(item_content, list):
575+
for part in item_content:
576+
text = part.get("text") if isinstance(part, dict) else None
577+
if text:
578+
state.record_completion("assistant", text)
579+
break
580+
581+
state.cleanup()
582+
state.end_workflow_span()
583+
584+
spans = exporter.get_finished_spans()
585+
llm_spans = [s for s in spans if s.name == "openai.realtime"]
586+
assert len(llm_spans) == 1
587+
588+
llm_span = llm_spans[0]
589+
assert llm_span.attributes.get("gen_ai.usage.input_tokens") == 42
590+
assert llm_span.attributes.get("gen_ai.usage.output_tokens") == 18
591+
assert llm_span.attributes.get("gen_ai.completion.0.content") == "It is sunny today."

0 commit comments

Comments
 (0)