Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions verifiers/envs/experimental/cli_agent_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def __init__(
sandbox_creations_per_minute: float | None = 128,
timeouts: SandboxTimeouts = SandboxTimeouts(),
keep_sandbox_for_scoring: bool = False,
task_finished_signal: str | None = None,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing documentation for new task_finished_signal parameter

Low Severity

The new task_finished_signal parameter and its corresponding @vf.stop hook task_finished_signal_emitted add user-facing functionality to CliAgentEnv, but docs/environments.md (line 888) — which explicitly enumerates CliAgentEnv constructor parameters — was not updated to mention the new parameter or describe the stop-signal behavior. This violates the documentation update rule for changes to core user-facing functionality described in docs/.

Fix in Cursor Fix in Web

Triggered by project rule: BugBot Instructions

Reviewed by Cursor Bugbot for commit ed421b3. Configure here.

**kwargs,
):
super().__init__(max_turns=max_turns, message_type="chat", **kwargs)
Expand All @@ -137,6 +138,7 @@ def __init__(
timeouts=timeouts,
)
self.keep_sandbox_for_scoring = keep_sandbox_for_scoring
self.task_finished_signal = task_finished_signal
self.run_command = run_command
self.poll_interval = poll_interval
self.timeout_seconds = timeout_seconds
Expand Down Expand Up @@ -677,6 +679,39 @@ async def timeout_reached(self, state: State) -> bool:
elapsed = time.time() - state["timing"]["start_time"]
return elapsed > self.timeout_seconds

@vf.stop
async def task_finished_signal_emitted(self, state: State) -> bool:
"""Stop when the agent emits the configured task-finished signal.

The signal is expected to arrive inside a tool-role message (e.g. the
model issued `echo "TASK_FINISHED"` as a bash tool call, and the tool
response echoes it back). Scanning only the latest intercepted prompt's
trailing tool messages keeps the check cheap.
"""
signal = self.task_finished_signal
if not signal:
return False
trajectory = state.get("trajectory", [])
if not trajectory:
return False
last_prompt = trajectory[-1].get("prompt") or []
for message in reversed(last_prompt):
role = (
message.get("role")
if isinstance(message, dict)
else getattr(message, "role", None)
)
if role != "tool":
break
content = (
message.get("content")
if isinstance(message, dict)
else getattr(message, "content", None)
)
if content and signal in str(content):
return True
return False

async def post_rollout(self, state: State):
"""
Override for custom post-rollout logic. For example, if sandbox state is needed for reward functions,
Expand Down
Loading