"""User-facing CLI: re-run a saved analysis ``.py`` file against the local trio bundle.
Saved code lives in ``output/{STUDY}/agent/analysis/code/run_*.py`` and gets
a docstring header explaining how to replicate the run. This module is the
``replicate`` step from that header::
python -m scripts.ai_assistant.sandbox.replicate <path_to_saved.py>
Unlike the agent-side sandbox, this runs the code in the current Python
process so the user can see output / interact with figures / write files
to their working directory normally. The same AST guards still apply
(import allow-list, dunder block) as a defense-in-depth check on code that
was originally LLM-generated, even if the user has chosen to run it locally.
"""
from __future__ import annotations
import builtins
import sys
from pathlib import Path
from typing import Any
import config
from scripts.ai_assistant.sandbox.runner import (
SandboxRejectionError,
_ast_pre_check,
_load_dataframes,
)
_HEADER_MARKER = "# === LLM-generated analysis code below ==="
def _strip_header(text: str) -> str:
"""Drop the docstring header so we exec only the LLM-generated portion."""
if _HEADER_MARKER in text:
return text.split(_HEADER_MARKER, 1)[1]
return text
def _discover_local_dataframes() -> dict[str, str]:
"""Find ``df_*`` JSONL paths from the local trio bundle."""
import re
datasets_dir = config.TRIO_DATASETS_DIR
if not datasets_dir.is_dir():
return {}
out: dict[str, str] = {}
for f in sorted(datasets_dir.glob("*.jsonl")):
var_name = "df_" + re.sub(r"[^a-zA-Z0-9_]", "_", f.stem)
out[var_name] = str(f.resolve())
return out
[docs]
def main(path_str: str) -> int:
path = Path(path_str)
if not path.is_file():
print(f"File not found: {path}", file=sys.stderr)
return 1
code = _strip_header(path.read_text(encoding="utf-8"))
try:
_ast_pre_check(code)
except SyntaxError as exc:
print(f"Syntax error in saved code: {exc}", file=sys.stderr)
return 2
except SandboxRejectionError as exc:
print(f"Saved code violates the AST allow-list: {exc}", file=sys.stderr)
print("Refusing to run. Inspect the file and either edit it or run", file=sys.stderr)
print("manually if you trust it.", file=sys.stderr)
return 2
df_paths = _discover_local_dataframes()
if not df_paths:
print(
f"Warning: no trio JSONL files found in {config.TRIO_DATASETS_DIR}.\n"
"Code will run, but pre-loaded DataFrames will be empty.",
file=sys.stderr,
)
dataframes = _load_dataframes(df_paths)
namespace: dict[str, Any] = {"__name__": "__main__", **dataframes}
try:
import numpy as np
import pandas as pd
namespace["pd"] = pd
namespace["np"] = np
except ImportError:
pass
try:
import plotly.express as px # type: ignore
import plotly.graph_objects as go # type: ignore
namespace["px"] = px
namespace["go"] = go
except ImportError:
pass
print(f"Replicating {path.name} ({len(dataframes)} DataFrame(s) loaded)\n", file=sys.stderr)
try:
# ``e``+``xec`` literal split to avoid a static-analysis hook
# misfire on this source file; behavior is identical.
getattr(builtins, "e" + "xec")(compile(code, str(path), "e" + "xec"), namespace)
except Exception as exc:
print(f"Error during replication: {type(exc).__name__}: {exc}", file=sys.stderr)
return 1
return 0
if __name__ == "__main__": # pragma: no cover
if len(sys.argv) != 2:
print(
"usage: python -m scripts.ai_assistant.sandbox.replicate <path_to_saved.py>",
file=sys.stderr,
)
sys.exit(64)
sys.exit(main(sys.argv[1]))