Source code for scripts.utils.snapshots

"""Human-reviewed snapshot baseline helpers.

The snapshot baseline is a full copy of ``output/{STUDY}/trio_bundle/``
saved under ``data/snapshots/{STUDY}/`` after human review. It is the
operator-approved fallback for broken or incomplete live bundles.

The active operations are:

* save the current live trio bundle as the reviewed snapshot baseline;
* restore the reviewed snapshot baseline over the live trio bundle;
* check whether a reviewed snapshot baseline exists.
"""

from __future__ import annotations

import argparse
import contextlib
import os
import shutil
import sys
from pathlib import Path

import config

__all__ = [
    "SnapshotError",
    "create_snapshot",
    "latest_snapshot_name",
    "list_snapshots",
    "main",
    "resolve_snapshot_name",
    "restore_snapshot",
    "snapshot_exists",
]


[docs] class SnapshotError(RuntimeError): """Raised when a snapshot operation cannot be completed."""
def _safe_rmtree(path: Path, *, ignore_errors: bool = False) -> None: """Delete *path*, refusing to follow symlinks at the root.""" if path.is_symlink(): path.unlink() return shutil.rmtree(path, ignore_errors=ignore_errors) def _snapshot_root() -> Path: return Path(config.STUDY_SNAPSHOTS_DIR) def _trio_root() -> Path: return Path(config.TRIO_BUNDLE_DIR) def _harden_tree_modes(root: Path) -> None: """Set every dir under *root* to mode 0o700 and every file to 0o600.""" with contextlib.suppress(OSError): root.chmod(0o700) for current_root, dirs, files in os.walk(str(root)): for d in dirs: with contextlib.suppress(OSError): (Path(current_root) / d).chmod(0o700) for f in files: with contextlib.suppress(OSError): (Path(current_root) / f).chmod(0o600)
[docs] def snapshot_exists() -> bool: """Return True when the reviewed snapshot baseline has usable content.""" root = _snapshot_root() return root.is_dir() and ( (root / "variables.json").is_file() or any((root / "datasets").glob("*.jsonl")) or any((root / "dictionary").glob("*.json")) or any((root / "pdfs").glob("*_variables.json")) )
[docs] def resolve_snapshot_name(name: str | None) -> str: """Compatibility shim: the only active snapshot name is the study name.""" _ = name return str(config.STUDY_NAME)
[docs] def create_snapshot(name: str | None = None, *, overwrite: bool = False) -> Path: """Copy the live trio bundle into ``data/snapshots/{STUDY}/``.""" _ = name trio = _trio_root() if not trio.exists() or not trio.is_dir(): raise SnapshotError(f"Trio bundle missing at {trio}; cannot save snapshot.") target = _snapshot_root() if target.exists(): if not overwrite: raise SnapshotError(f"Snapshot already exists: {target}") _safe_rmtree(target) target.parent.mkdir(parents=True, exist_ok=True) shutil.copytree(trio, target, symlinks=False) _harden_tree_modes(target) return target
[docs] def restore_snapshot(name: str | None = None) -> Path: """Overwrite the live trio bundle with the reviewed snapshot baseline.""" _ = name source = _snapshot_root() if not snapshot_exists(): raise SnapshotError(f"Reviewed snapshot not found or empty: {source}") trio = _trio_root() trio.parent.mkdir(parents=True, exist_ok=True) staging = trio.with_name(trio.name + ".replacing") backup = trio.with_name(trio.name + ".previous") for path in (staging, backup): if path.exists(): _safe_rmtree(path) shutil.copytree(source, staging, symlinks=False) _harden_tree_modes(staging) if trio.exists(): trio.rename(backup) try: staging.rename(trio) except Exception: if backup.exists() and not trio.exists(): backup.rename(trio) if staging.exists(): with contextlib.suppress(Exception): _safe_rmtree(staging) raise if backup.exists(): with contextlib.suppress(Exception): _safe_rmtree(backup) return trio
[docs] def list_snapshots() -> list[str]: """Return the single reviewed snapshot name when it exists.""" return [str(config.STUDY_NAME)] if snapshot_exists() else []
[docs] def latest_snapshot_name() -> str | None: """Return the study snapshot name, or None if no baseline exists.""" return str(config.STUDY_NAME) if snapshot_exists() else None
def _cmd_create(args: argparse.Namespace) -> int: try: path = create_snapshot(overwrite=args.force) except SnapshotError as exc: print(f"✗ {exc}", file=sys.stderr) return 1 print(f"✓ Reviewed snapshot saved to {path}") return 0 def _cmd_list(_args: argparse.Namespace) -> int: if not snapshot_exists(): print("No reviewed snapshot baseline available.") return 0 print(f"Reviewed snapshot baseline: {_snapshot_root()}") return 0 def _cmd_restore(_args: argparse.Namespace) -> int: try: path = restore_snapshot() except SnapshotError as exc: print(f"✗ {exc}", file=sys.stderr) return 1 print(f"✓ Restored reviewed snapshot into {path}") return 0
[docs] def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( prog="python -m scripts.utils.snapshots", description="Save or restore the reviewed data/snapshots/{STUDY} baseline.", ) sub = parser.add_subparsers(dest="command", required=True) p_create = sub.add_parser( "create", help="Copy output/{STUDY}/trio_bundle/ → data/snapshots/{STUDY}/", ) p_create.add_argument( "--force", action="store_true", help="Overwrite the existing reviewed snapshot baseline.", ) p_create.add_argument( "--name", default=None, help="Ignored compatibility option; snapshots are single-baseline per study.", ) p_create.set_defaults(func=_cmd_create) p_list = sub.add_parser("list", help="Show whether the reviewed snapshot exists") p_list.set_defaults(func=_cmd_list) p_restore = sub.add_parser( "restore", help="Overwrite the live trio bundle with data/snapshots/{STUDY}/", ) p_restore.add_argument( "name", nargs="?", default=None, help="Ignored compatibility option; snapshots are single-baseline per study.", ) p_restore.set_defaults(func=_cmd_restore) args = parser.parse_args(argv) return int(args.func(args))
if __name__ == "__main__": sys.exit(main())