PrimeIntellect-ai · rasdani · Apr 24, 2026 · cursor · Apr 24, 2026
diff --git a/verifiers/envs/experimental/cli_agent_env.py b/verifiers/envs/experimental/cli_agent_env.py
@@ -120,6 +120,7 @@ def __init__(
         sandbox_creations_per_minute: float | None = 128,
         timeouts: SandboxTimeouts = SandboxTimeouts(),
         keep_sandbox_for_scoring: bool = False,
+        task_finished_signal: str | None = None,
         **kwargs,
     ):
         super().__init__(max_turns=max_turns, message_type="chat", **kwargs)
@@ -137,6 +138,7 @@ def __init__(
             timeouts=timeouts,
         )
         self.keep_sandbox_for_scoring = keep_sandbox_for_scoring
+        self.task_finished_signal = task_finished_signal
         self.run_command = run_command
         self.poll_interval = poll_interval
         self.timeout_seconds = timeout_seconds
@@ -677,6 +679,39 @@ async def timeout_reached(self, state: State) -> bool:
         elapsed = time.time() - state["timing"]["start_time"]
         return elapsed > self.timeout_seconds
 
+    @vf.stop
+    async def task_finished_signal_emitted(self, state: State) -> bool:
+        """Stop when the agent emits the configured task-finished signal.
+
+        The signal is expected to arrive inside a tool-role message (e.g. the
+        model issued `echo "TASK_FINISHED"` as a bash tool call, and the tool
+        response echoes it back). Scanning only the latest intercepted prompt's
+        trailing tool messages keeps the check cheap.
+        """
+        signal = self.task_finished_signal
+        if not signal:
+            return False
+        trajectory = state.get("trajectory", [])
+        if not trajectory:
+            return False
+        last_prompt = trajectory[-1].get("prompt") or []
+        for message in reversed(last_prompt):
+            role = (
+                message.get("role")
+                if isinstance(message, dict)
+                else getattr(message, "role", None)
+            )
+            if role != "tool":
+                break
+            content = (
+                message.get("content")
+                if isinstance(message, dict)
+                else getattr(message, "content", None)
+            )
+            if content and signal in str(content):
+                return True
+        return False
+
     async def post_rollout(self, state: State):
         """
         Override for custom post-rollout logic. For example, if sandbox state is needed for reward functions,