fix(openai-agents): fix realtime session event handling for prompts, completions, and usage

EliJaghab · EliJaghab · commit ba639deb6dee · 2026-02-15T17:40:42.000-05:00
Handle history_updated events to capture assistant transcript updates. Fix dict-based data access in response.done handler where getattr was used on dicts instead of .get(), silently returning None. Fix dict-case event unwrapping where the data variable was not updated to the nested level. Remove dead response event handler that could never match. Closes #3685
diff --git a/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py b/packages/opentelemetry-instrumentation-openai-agents/opentelemetry/instrumentation/openai_agents/_realtime_wrappers.py
@@ -529,33 +529,34 @@ async def traced_put_event(self, event):
                         if content and role == "assistant":
                             state.record_completion(role, content)
 
-                elif event_type == "response":
-                    output = getattr(event, "output", None)
-                    if output and isinstance(output, list):
-                        for item in output:
+                elif event_type == "history_updated":
+                    history = getattr(event, "history", None)
+                    if history and isinstance(history, list):
+                        for item in reversed(history):
                             role = getattr(item, "role", None)
                             if role == "assistant":
                                 item_content = getattr(item, "content", None)
-                                if item_content:
-                                    if isinstance(item_content, list):
-                                        for part in item_content:
-                                            text = getattr(part, "text", None)
-                                            if text:
-                                                state.record_completion(role, text)
-                                                break
-                                    elif isinstance(item_content, str):
-                                        state.record_completion(role, item_content)
+                                if item_content and isinstance(item_content, list):
+                                    for part in item_content:
+                                        text = getattr(part, "text", None) or getattr(
+                                            part, "transcript", None
+                                        )
+                                        if text:
+                                            state.record_completion(role, text)
+                                            break
+                                break
 
                 elif event_type == "raw_model_event":
                     data = getattr(event, "data", None)
                     if data:
                         if isinstance(data, dict):
                             data_type = data.get("type")
-                            raw_data = data.get("data", data)
-                            if isinstance(raw_data, dict):
+                            raw_data = data.get("data")
+                            if raw_data and isinstance(raw_data, dict):
                                 nested_type = raw_data.get("type")
                                 if nested_type:
                                     data_type = nested_type
+                                    data = raw_data
                         else:
                             data_type = getattr(data, "type", None)
                             nested_data = getattr(data, "data", None)
@@ -586,28 +587,38 @@ async def traced_put_event(self, event):
                             if usage:
                                 state.record_usage(usage)
 
-                                output = getattr(response, "output", None)
+                                if isinstance(response, dict):
+                                    output = response.get("output")
+                                else:
+                                    output = getattr(response, "output", None)
                                 if output and isinstance(output, list):
                                     for item in output:
-                                        item_type = getattr(item, "type", None)
-                                        if item_type == "message":
+                                        if isinstance(item, dict):
+                                            item_type = item.get("type")
+                                            role = item.get("role")
+                                            item_content = item.get("content")
+                                        else:
+                                            item_type = getattr(item, "type", None)
                                             role = getattr(item, "role", None)
-                                            if role == "assistant":
-                                                item_content = getattr(
-                                                    item, "content", None
-                                                )
-                                                if item_content and isinstance(
-                                                    item_content, list
-                                                ):
-                                                    for part in item_content:
+                                            item_content = getattr(
+                                                item, "content", None
+                                            )
+                                        if item_type == "message" and role == "assistant":
+                                            if item_content and isinstance(
+                                                item_content, list
+                                            ):
+                                                for part in item_content:
+                                                    if isinstance(part, dict):
+                                                        text = part.get("text")
+                                                    else:
                                                         text = getattr(
                                                             part, "text", None
                                                         )
-                                                        if text:
-                                                            state.record_completion(
-                                                                role, text
-                                                            )
-                                                            break
+                                                    if text:
+                                                        state.record_completion(
+                                                            role, text
+                                                        )
+                                                        break
 
                         elif data_type == "item_updated":
                             item = getattr(data, "item", None)
diff --git a/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py b/packages/opentelemetry-instrumentation-openai-agents/tests/test_realtime_session.py
@@ -475,3 +475,117 @@ def test_audio_span_parented_under_current_agent(self, tracer, tracer_provider):
         audio_span = next(s for s in spans if s.name == "openai.realtime")
 
         assert audio_span.parent.span_id == agent_span.context.span_id
+
+
+class TestTracedPutEventHandlers:
+    """Tests for event handling in traced_put_event via RealtimeTracingState."""
+
+    def test_history_updated_captures_assistant_completion(self, tracer, tracer_provider):
+        """Test that history_updated events capture assistant completions."""
+        _, exporter = tracer_provider
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("Test Agent")
+        state.start_agent_span("Voice Assistant")
+
+        state.record_prompt("user", "Hello")
+
+        # Simulate what history_updated handler does: scan history for assistant content
+        class MockContent:
+            def __init__(self, text=None, transcript=None):
+                self.text = text
+                self.transcript = transcript
+
+        class MockItem:
+            def __init__(self, role, content):
+                self.role = role
+                self.content = content
+
+        history = [
+            MockItem("user", [MockContent(text="Hello")]),
+            MockItem("assistant", [MockContent(transcript="Hi there!")]),
+        ]
+
+        # Replicate the history_updated handler logic
+        for item in reversed(history):
+            role = getattr(item, "role", None)
+            if role == "assistant":
+                item_content = getattr(item, "content", None)
+                if item_content and isinstance(item_content, list):
+                    for part in item_content:
+                        text = getattr(part, "text", None) or getattr(
+                            part, "transcript", None
+                        )
+                        if text:
+                            state.record_completion(role, text)
+                            break
+                break
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_spans = [s for s in spans if s.name == "openai.realtime"]
+        assert len(llm_spans) == 1
+        assert llm_spans[0].attributes.get("gen_ai.completion.0.content") == "Hi there!"
+
+    def test_response_done_dict_captures_usage_and_completion(self, tracer, tracer_provider):
+        """Test that response.done with dict data captures usage and completions."""
+        _, exporter = tracer_provider
+        state = RealtimeTracingState(tracer)
+        state.start_workflow_span("Test Agent")
+        state.start_agent_span("Voice Assistant")
+
+        state.record_prompt("user", "What is the weather?")
+
+        # Simulate dict-based response.done data (as sent by OpenAI raw API)
+        response_done_data = {
+            "type": "response.done",
+            "response": {
+                "usage": {
+                    "input_tokens": 42,
+                    "output_tokens": 18,
+                    "total_tokens": 60,
+                },
+                "output": [
+                    {
+                        "type": "message",
+                        "role": "assistant",
+                        "content": [
+                            {"type": "text", "text": "It is sunny today."}
+                        ],
+                    }
+                ],
+            },
+        }
+
+        # Extract usage from dict
+        response = response_done_data.get("response", {})
+        usage = response.get("usage") if isinstance(response, dict) else None
+        if usage:
+            state.record_usage(usage)
+
+        # Extract completion from dict
+        output = response.get("output") if isinstance(response, dict) else None
+        if output and isinstance(output, list):
+            for item in output:
+                if isinstance(item, dict):
+                    if item.get("type") == "message" and item.get("role") == "assistant":
+                        item_content = item.get("content")
+                        if item_content and isinstance(item_content, list):
+                            for part in item_content:
+                                text = part.get("text") if isinstance(part, dict) else None
+                                if text:
+                                    state.record_completion("assistant", text)
+                                    break
+
+        state.cleanup()
+        state.end_workflow_span()
+
+        spans = exporter.get_finished_spans()
+        llm_spans = [s for s in spans if s.name == "openai.realtime"]
+        assert len(llm_spans) == 1
+
+        llm_span = llm_spans[0]
+        assert llm_span.attributes.get("gen_ai.usage.input_tokens") == 42
+        assert llm_span.attributes.get("gen_ai.usage.output_tokens") == 18
+        assert llm_span.attributes.get("gen_ai.completion.0.content") == "It is sunny today."