Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions packages/prime/src/prime_cli/commands/evals.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import json
import os
import re
import time
from functools import wraps
Expand Down Expand Up @@ -33,6 +34,7 @@
)
from ..utils.display import get_eval_viewer_url
from ..utils.env_metadata import find_environment_metadata
from ..utils.env_vars import EnvParseError, collect_env_vars
from ..utils.eval_push import load_results_jsonl
from ..utils.hosted_eval import (
EvalStatus,
Expand Down Expand Up @@ -476,6 +478,31 @@ def _fetch_logs(client: APIClient, eval_id: str) -> str:
return response.get("logs") or ""


def _apply_eval_cli_env_overrides(
env_var: Optional[list[str]],
env_file: Optional[list[str]],
) -> None:
"""Load explicit eval CLI env inputs into the current process environment."""
if not env_var and not env_file:
return

def _warn(message: str) -> None:
console.print(f"[yellow]Warning:[/yellow] {message}")

try:
env_overrides = collect_env_vars(
env_args=env_var,
env_files=env_file,
on_warning=_warn,
)
except EnvParseError as exc:
console.print(f"[red]Error:[/red] {exc}")
raise typer.Exit(1) from exc

for key, value in env_overrides.items():
os.environ[key] = value


def _build_hosted_evaluation_payload(config: HostedEvalConfig) -> dict[str, Any]:
eval_config: dict[str, Any] = {
"num_examples": config.num_examples,
Expand Down Expand Up @@ -1312,6 +1339,23 @@ def run_eval_cmd(
"(used to locate .prime/.env-metadata.json for upstream resolution)"
),
),
env_var: Optional[list[str]] = typer.Option(
None,
"--env-var",
help=(
"Environment variable to load before Prime auth/model resolution. "
"Accepts: KEY=VALUE (direct value), KEY (reads from $KEY), "
"or path/to/file.env (loads env file)."
),
),
env_file: Optional[list[str]] = typer.Option(
None,
"--env-file",
help=(
"Path to .env file to load before Prime auth/model resolution. "
"Supports ${VAR} expansion from local env."
),
),
hosted: bool = typer.Option(
False,
"--hosted",
Expand Down Expand Up @@ -1383,6 +1427,7 @@ def run_eval_cmd(
console.print(f"[dim]Example: {EVAL_RUN_EXAMPLE_COMMAND}[/dim]")
raise typer.Exit(2)

_apply_eval_cli_env_overrides(env_var=env_var, env_file=env_file)
env_dir_path: Optional[str] = None
poll_interval_was_provided = (
ctx.get_parameter_source("poll_interval") == ParameterSource.COMMANDLINE
Expand Down
2 changes: 2 additions & 0 deletions packages/prime/src/prime_cli/verifiers_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ def _write_help(text: str) -> None:

def _append_eval_options(help_text: str) -> str:
extra_lines = [
" --env-var TEXT Load env vars before Prime auth/model resolution.",
" --env-file PATH Load a .env file before Prime auth/model resolution.",
" --skip-upload Skip uploading evaluation results to the platform.",
" --env-path PATH Explicit path for upstream environment metadata.",
" --hosted Run the evaluation on the platform instead of locally.",
Expand Down
7 changes: 7 additions & 0 deletions packages/prime/tests/test_eval_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ def test_sanitize_help_removes_vf_eval_aliases():
assert "env_id_or_config" not in help_text


def test_append_eval_options_includes_env_loading_flags() -> None:
help_text = _append_eval_options("Usage: prime eval run\n")

assert "--env-file PATH" in help_text
assert "--env-var TEXT" in help_text


def test_append_eval_options_mentions_tunnel_access():
help_text = _append_eval_options("Usage: prime eval run [-h] environment\n")

Expand Down
113 changes: 113 additions & 0 deletions packages/prime/tests/test_hosted_eval.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pathlib import Path

import pytest
Expand Down Expand Up @@ -1329,6 +1330,118 @@ def fake_run_eval_passthrough(environment, passthrough_args, skip_upload, env_pa
}


def patch_local_eval_bridge(monkeypatch, captured):
class DummyPlugin:
eval_module = "verifiers.cli.commands.eval"

def build_module_command(self, module, args):
return [module, *args]

class DummyConfig:
def __init__(self):
captured["config_api_key"] = os.environ.get("PRIME_API_KEY")
captured["config_inference_url"] = os.environ.get("PRIME_INFERENCE_URL")
self.api_key = captured["config_api_key"] or ""
self.inference_url = (
captured["config_inference_url"] or "https://configured.example/v1"
)
self.team_id = None

def fake_prepare_single_environment(_plugin, env_reference, _env_dir_path):
return ResolvedEnvironment(
original=env_reference,
env_name=env_reference,
install_mode="local",
)

def fake_run_command(command, env=None):
captured["command"] = command
captured["run_env"] = env or {}
raise typer.Exit(0)

monkeypatch.setattr(
"prime_cli.verifiers_bridge.load_verifiers_prime_plugin",
lambda console: DummyPlugin(),
)
monkeypatch.setattr("prime_cli.verifiers_bridge.Config", lambda: DummyConfig())
monkeypatch.setattr("prime_cli.verifiers_bridge._validate_model", lambda *_args: None)
monkeypatch.setattr(
"prime_cli.verifiers_bridge._preflight_inference_billing",
lambda *_args: None,
)
monkeypatch.setattr(
"prime_cli.verifiers_bridge._prepare_single_environment",
fake_prepare_single_environment,
)
monkeypatch.setattr("prime_cli.verifiers_bridge._run_command", fake_run_command)


def test_eval_run_env_file_overrides_process_env(monkeypatch, tmp_path):
captured = {}
patch_local_eval_bridge(monkeypatch, captured)

env_file = tmp_path / ".env"
env_file.write_text(
"PRIME_API_KEY=from-env-file\n"
"PRIME_INFERENCE_URL=https://from-env-file.example/v1\n"
)

result = runner.invoke(
app,
["eval", "run", "gsm8k", "--env-file", str(env_file)],
env={
"PRIME_DISABLE_VERSION_CHECK": "1",
"PRIME_API_KEY": "from-process-env",
"PRIME_INFERENCE_URL": "https://from-process.example/v1",
},
)

assert result.exit_code == 0, result.output
assert captured["config_api_key"] == "from-env-file"
assert captured["config_inference_url"] == "https://from-env-file.example/v1"
assert captured["run_env"]["PRIME_API_KEY"] == "from-env-file"
assert captured["run_env"]["PRIME_INFERENCE_URL"] == "https://from-env-file.example/v1"
assert "--env-file" not in captured["command"]


def test_eval_run_env_var_overrides_env_file(monkeypatch, tmp_path):
captured = {}
patch_local_eval_bridge(monkeypatch, captured)

env_file = tmp_path / ".env"
env_file.write_text(
"PRIME_API_KEY=from-env-file\n"
"PRIME_INFERENCE_URL=https://from-env-file.example/v1\n"
)

result = runner.invoke(
app,
[
"eval",
"run",
"gsm8k",
"--env-file",
str(env_file),
"--env-var",
"PRIME_API_KEY=from-env-var",
"--env-var",
"PRIME_INFERENCE_URL=https://from-env-var.example/v1",
],
env={
"PRIME_DISABLE_VERSION_CHECK": "1",
"PRIME_API_KEY": "from-process-env",
"PRIME_INFERENCE_URL": "https://from-process.example/v1",
},
)

assert result.exit_code == 0, result.output
assert captured["config_api_key"] == "from-env-var"
assert captured["config_inference_url"] == "https://from-env-var.example/v1"
assert captured["run_env"]["PRIME_API_KEY"] == "from-env-var"
assert captured["run_env"]["PRIME_INFERENCE_URL"] == "https://from-env-var.example/v1"
assert "--env-var" not in captured["command"]


@pytest.mark.parametrize(
("extra_args", "expected_flag"),
[
Expand Down