Source code for scripts.ai_assistant.file_access

"""Agent-world file-access boundary enforcement.

The production LLM agent's permitted zones are (2026-04-24 boundary design):

* **Read** — ``TRIO_BUNDLE_DIR`` (scrubbed, k-anon-gated trio outputs) **or**
  ``AGENT_STATE_DIR`` (its own analysis outputs and conversations).
  A small allowlist admits read-only source-tree config files
  (``config/study_knowledge.yaml``) that tool implementations need.
* **Write** — ``AGENT_STATE_DIR`` only.

Everything else — ``STUDY_AUDIT_DIR`` (incl. telemetry), ``RAW_DATA_DIR``,
``LOGS_DIR``, ``STUDY_STAGING_DIR``, arbitrary filesystem paths — is
hard-rejected with :class:`ZoneViolationError` (a ``PermissionError``
subclass from ``scripts.security.secure_env``).

This module is the chokepoint: every agent-tool file read or write should
call :func:`validate_agent_read` or :func:`validate_agent_write` before
touching disk. The existing ``assert_trio_bundle_zone`` and
``assert_output_zone`` in ``scripts.security.secure_env`` remain valid
narrower checks — this module layers the expanded agent-runtime zone on
top without changing pipeline-side enforcement.
"""

from __future__ import annotations

import os
from pathlib import Path

import config
from scripts.security.secure_env import ZoneViolationError

__all__ = [
    "ZoneViolationError",
    "is_agent_readable",
    "validate_agent_read",
    "validate_agent_write",
    "validate_sandbox_write",
]


def _resolve(p: str | Path) -> str:
    return os.path.realpath(str(p))


def _is_within(path_realpath: str, base_realpath: str) -> bool:
    """Return True when *path_realpath* is the same as or under *base_realpath*.

    Both arguments must already be ``os.path.realpath``-resolved.
    """
    try:
        return os.path.commonpath([path_realpath, base_realpath]) == base_realpath
    except ValueError:
        # ValueError: paths on different drives (Windows) — never the same zone.
        return False


def _zones() -> tuple[list[str], list[str], frozenset[str]]:
    """Recompute permitted zones from current config.

    Called per-validation so that ``conftest.py`` monkeypatches of
    ``config.TRIO_BUNDLE_DIR`` / ``config.AGENT_STATE_DIR`` take effect.
    Cost is trivial (two ``realpath`` calls).
    """
    read_roots = [
        _resolve(config.TRIO_BUNDLE_DIR),
        _resolve(config.AGENT_STATE_DIR),
    ]
    write_roots = [
        _resolve(config.AGENT_STATE_DIR),
    ]
    # Repo-tracked config that StudyKnowledge + similar helpers load at
    # tool-invocation time. This is the "how" surface (per the hard PHI
    # rule), not the "what" — still inside the source tree.
    project_root = Path(__file__).resolve().parents[2]
    read_allowlist = frozenset(
        {
            _resolve(project_root / "config" / "study_knowledge.yaml"),
        }
    )
    return read_roots, write_roots, read_allowlist


[docs] def validate_agent_read(path: str | Path) -> Path: """Return the resolved :class:`~pathlib.Path` if the agent may read it. Raises: ZoneViolationError: *path* is outside the agent's permitted read zones. """ read_roots, _, allowlist = _zones() resolved = _resolve(path) if resolved in allowlist: return Path(resolved) for root in read_roots: if _is_within(resolved, root): return Path(resolved) raise ZoneViolationError( f"Agent read rejected — path is outside the permitted zones " f"(trio_bundle/ or agent/): {path}" )
[docs] def validate_agent_write(path: str | Path) -> Path: """Return the resolved :class:`~pathlib.Path` if the agent may write it. Raises: ZoneViolationError: *path* is outside ``AGENT_STATE_DIR``. """ _, write_roots, _ = _zones() resolved = _resolve(path) for root in write_roots: if _is_within(resolved, root): return Path(resolved) raise ZoneViolationError( f"Agent write rejected — path is outside the agent zone " f"(only output/{{STUDY}}/agent/** is writable): {path}" )
[docs] def is_agent_readable(path: str | Path) -> bool: """Non-raising variant of :func:`validate_agent_read` for sentinel checks.""" try: validate_agent_read(path) except ZoneViolationError: return False return True
[docs] def validate_sandbox_write(path: str | Path) -> Path: """Return the resolved :class:`~pathlib.Path` if the exec_python sandbox may write to *path*. The sandbox runs LLM-generated code — a strictly narrower threat model than tool-code. Writes are scoped to ``AGENT_OUTPUT_DIR`` (``agent/analysis/``) rather than the full ``AGENT_STATE_DIR``. Uses ``os.path.commonpath`` (via :func:`_is_within`) so that sibling prefixes like ``agent/analysis_exfil`` cannot masquerade as ``analysis/``. Raises: ZoneViolationError: *path* is outside ``AGENT_OUTPUT_DIR``. """ sandbox_root = _resolve(config.AGENT_OUTPUT_DIR) resolved = _resolve(path) if _is_within(resolved, sandbox_root): return Path(resolved) raise ZoneViolationError( f"Sandbox write denied — exec_python may only write inside agent/analysis/: {path}" )