Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions python/packages/devui/agent_framework_devui/_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,11 @@ def _convert_openai_input_to_chat_message(self, input_items: list[Any], Message:
if isinstance(item, dict):
item_dict = cast(dict[str, Any], item)
item_type = item_dict.get("type")
if item_type == "message":
if item_type == "message" or (item_type is None and "role" in item_dict):
message_role = item_dict.get("role")
if message_role is not None and message_role != "user":
logger.debug("Skipping non-user OpenAI message item with role %r", message_role)
continue
# Extract content from OpenAI message
message_content = item_dict.get("content", [])

Expand Down Expand Up @@ -777,6 +781,7 @@ def _convert_openai_input_to_chat_message(self, input_items: list[Any], Message:

# If no contents found, create a simple text message
if not contents:
logger.warning("All input items were non-user; no user content extracted")
contents.append(Content.from_text(text=""))

chat_message = Message(role="user", contents=contents)
Expand Down Expand Up @@ -828,8 +833,34 @@ def _is_openai_multimodal_format(self, input_data: Any) -> bool:
first_item = input_data_items[0]
if not isinstance(first_item, dict):
return False
first_type = cast(dict[str, Any], first_item).get("type")
return isinstance(first_type, str) and first_type == "message"
first_dict = cast(dict[str, Any], first_item)
first_type = first_dict.get("type")
is_chat_format = False
if isinstance(first_type, str) and first_type == "message":
is_chat_format = True
elif first_type is None:
# Also accept Chat Completions format: {"role": "...", "content": "..."}
# but require the minimum expected shape to avoid misclassifying
# unrelated or malformed list inputs as chat messages.
role = first_dict.get("role")
content = first_dict.get("content")
valid_roles = {"system", "user", "assistant", "tool", "developer"}
is_chat_format = bool(
isinstance(role, str)
and role in valid_roles
and "content" in first_dict
and isinstance(content, str | list)
)

if not is_chat_format:
return False

# Require at least one user-role item to avoid routing non-user-only
# arrays into the conversion path where all items would be skipped,
# silently producing an empty message.
return any(
isinstance(item, dict) and cast(dict[str, Any], item).get("role") == "user" for item in input_data_items
)

async def _parse_workflow_input(self, workflow: Any, raw_input: Any) -> Any:
"""Parse input based on workflow's expected input type.
Expand Down
192 changes: 192 additions & 0 deletions python/packages/devui/tests/devui/test_multimodal_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,3 +150,195 @@ async def test_parse_workflow_input_still_handles_simple_dict(self):

# Result should be Message (from _parse_structured_workflow_input)
assert isinstance(result, Message), f"Expected Message, got {type(result)}"

def test_is_openai_multimodal_format_detects_chat_completions_format(self):
"""Test that _is_openai_multimodal_format detects Chat Completions format (no type field)."""
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Chat Completions format: role + content, no type field
chat_completions_format = [{"role": "user", "content": "Describe this image"}]
assert executor._is_openai_multimodal_format(chat_completions_format) is True

def test_convert_chat_completions_format_with_string_content(self):
"""Test that Chat Completions format with string content is converted correctly."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Chat Completions format (no type field, string content)
input_data = [{"role": "user", "content": "Which Google phones are allowed?"}]

result = executor._convert_input_to_chat_message(input_data)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 1
assert result.contents[0].text == "Which Google phones are allowed?"

def test_convert_chat_completions_envelope_with_responses_api_content(self):
"""Test Chat Completions-style envelope (no type field) with Responses API content parts."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Chat Completions format with list content (input_text items)
input_data = [
{
"role": "user",
"content": [
{"type": "input_text", "text": "Describe this image"},
{"type": "input_image", "image_url": TEST_IMAGE_DATA_URI},
],
}
]

result = executor._convert_input_to_chat_message(input_data)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 2
assert result.contents[0].text == "Describe this image"
assert result.contents[1].type == "data"

async def test_parse_workflow_input_chat_completions_json_string(self):
"""Regression test: JSON-stringified Chat Completions array goes through _parse_workflow_input."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# JSON-stringified Chat Completions format (the path DevUI/frontend commonly uses)
chat_input = json.dumps([{"role": "user", "content": "Which Google phones are allowed?"}])

mock_workflow = MagicMock()
mock_executor = MagicMock()
mock_executor.input_types = [Message]
mock_workflow.get_start_executor.return_value = mock_executor

result = await executor._parse_workflow_input(mock_workflow, chat_input)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 1
assert result.contents[0].text == "Which Google phones are allowed?"

def test_convert_skips_non_user_messages(self):
"""Test that non-user messages (system, assistant) are skipped during conversion."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Mix of system and user messages - only user content should be kept
input_data = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello!"},
]

result = executor._convert_input_to_chat_message(input_data)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 1
assert result.contents[0].text == "Hello!"

def test_convert_skips_all_non_user_messages_chat_completions(self):
"""When ALL messages are non-user (Chat Completions format), the result is a Message with empty text."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Only non-user messages, no user content at all
input_data = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "assistant", "content": "How can I help?"},
]

result = executor._convert_input_to_chat_message(input_data)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 1
assert result.contents[0].text == ""

def test_convert_skips_non_user_messages_responses_api_format(self):
"""Non-user messages in Responses API format (with type: message) are also skipped."""
from agent_framework import Message

discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

input_data = [
{"type": "message", "role": "system", "content": "You are a helpful assistant."},
{"type": "message", "role": "user", "content": "Hello!"},
]

result = executor._convert_input_to_chat_message(input_data)

assert isinstance(result, Message), f"Expected Message, got {type(result)}"
assert len(result.contents) == 1
assert result.contents[0].text == "Hello!"

def test_is_openai_multimodal_format_accepts_all_valid_roles(self):
"""All valid roles are accepted when accompanied by a user-role message."""
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Single user message is accepted
assert executor._is_openai_multimodal_format([{"role": "user", "content": "hi"}]) is True

# Non-user roles are accepted when a user-role item is also present
for role in ("system", "assistant", "tool", "developer"):
assert (
executor._is_openai_multimodal_format([
{"role": role, "content": "hi"},
{"role": "user", "content": "hello"},
])
is True
), f"Expected role {role!r} to be accepted alongside user"

def test_is_openai_multimodal_format_rejects_non_user_only(self):
"""Arrays with no user-role message are rejected to prevent silent empty message fallback."""
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Non-user-only Chat Completions format
assert executor._is_openai_multimodal_format([{"role": "system", "content": "hi"}]) is False
assert (
executor._is_openai_multimodal_format([
{"role": "system", "content": "hi"},
{"role": "assistant", "content": "hello"},
])
is False
)

# Non-user-only Responses API format
assert (
executor._is_openai_multimodal_format([
{"type": "message", "role": "system", "content": "hi"},
])
is False
)

def test_is_openai_multimodal_format_rejects_malformed_input(self):
"""Test that _is_openai_multimodal_format rejects inputs missing content or with invalid roles."""
discovery = MagicMock(spec=EntityDiscovery)
mapper = MagicMock(spec=MessageMapper)
executor = AgentFrameworkExecutor(discovery, mapper)

# Missing content key
assert executor._is_openai_multimodal_format([{"role": "user"}]) is False
# Invalid role value
assert executor._is_openai_multimodal_format([{"role": "unknown", "content": "hi"}]) is False
# Role is not a string
assert executor._is_openai_multimodal_format([{"role": 123, "content": "hi"}]) is False
# Content is neither str nor list
assert executor._is_openai_multimodal_format([{"role": "user", "content": 42}]) is False
Loading