Source code for scripts.ai_assistant.ui.model_policy

"""Version-aware model allowlist for high-risk actions.

Loading or reloading a study mutates ``output/{STUDY}/`` in place. That
pipeline is irreversible without a snapshot restore, so we gate it behind a
model quality bar:

- Anthropic Claude Opus     ≥ 4.6
- Google Gemini Pro         ≥ 3.1
- OpenAI GPT                ≥ 5.3

Any model explicitly in the Ollama provider category passes automatically —
local models are the user's own hardware and are assumed operator-approved.

The allowlist uses *version comparison*, not exact string matching, because
model names change. New minor versions are admitted automatically once they
meet the floor.

Public API
----------
- :func:`is_model_allowed_for_study_load` — single boolean check.
- :func:`describe_allowlist` — human-readable requirements string for the UI.
"""

from __future__ import annotations

import re
from dataclasses import dataclass

__all__ = [
    "ModelGateResult",
    "describe_allowlist",
    "is_model_allowed_for_study_load",
]



[docs]
@dataclass(frozen=True)
class ModelGateResult:
    """Outcome of evaluating a model against the study-load allowlist."""

    allowed: bool
    reason: str



# Family rules: each tuple is (required_substrings, forbidden_substrings, floor).
# A model matches when it contains every required substring and none of the
# forbidden ones. The first matching rule wins.
_FAMILIES: tuple[tuple[tuple[str, ...], tuple[str, ...], tuple[int, int]], ...] = (
    # Claude Opus ≥ 4.6
    (("opus",), (), (4, 6)),
    # Gemini Pro ≥ 3.1  (explicitly reject "-flash" / non-pro variants)
    (("gemini", "pro"), ("flash", "nano"), (3, 1)),
    # OpenAI GPT ≥ 5.3  (reject legacy "gpt-4" / "gpt-3" even though they'd parse)
    (("gpt",), (), (5, 3)),
)


_VERSION_RE = re.compile(r"(?<!\d)(\d+)(?:[.\-_](\d+))?")


def _extract_version(model: str) -> tuple[int, int] | None:
    """Pull the first ``major(.minor)?`` pair from a model string."""

    match = _VERSION_RE.search(model)
    if not match:
        return None
    major = int(match.group(1))
    minor = int(match.group(2)) if match.group(2) else 0
    return major, minor


def _normalise(name: str) -> str:
    return name.strip().lower()



[docs]
def is_model_allowed_for_study_load(*, provider: str, model: str) -> ModelGateResult:
    """Return whether ``provider``/``model`` may trigger a study load/reload.

    Rules:

    - Ollama (local) is always allowed — the user controls the runtime.
    - Otherwise, the model must match one of the known family rules and meet
      the minimum version (floor comparison is tuple-wise).
    - Unknown models are rejected (fail-closed).
    """

    p = _normalise(provider)
    m = _normalise(model)

    if not m:
        return ModelGateResult(allowed=False, reason="No model selected.")

    if p == "ollama":
        return ModelGateResult(
            allowed=True,
            reason="Local Ollama models are trusted by operator.",
        )

    version = _extract_version(m)

    for required, forbidden, floor in _FAMILIES:
        if not all(token in m for token in required):
            continue
        if any(token in m for token in forbidden):
            continue
        family_label = "/".join(required)
        if version is None:
            return ModelGateResult(
                allowed=False,
                reason=(
                    f"Could not parse version from {model!r}; "
                    f"need {family_label} ≥ {floor[0]}.{floor[1]}."
                ),
            )
        if version >= floor:
            return ModelGateResult(
                allowed=True,
                reason=(f"{model} is at or above the {family_label} {floor[0]}.{floor[1]} floor."),
            )
        return ModelGateResult(
            allowed=False,
            reason=(f"{model} is below the {family_label} {floor[0]}.{floor[1]} floor."),
        )

    return ModelGateResult(
        allowed=False,
        reason=(
            "Model is not on the study-load allowlist. "
            "Use Claude Opus ≥ 4.6, Gemini Pro ≥ 3.1, GPT ≥ 5.3, "
            "or a local Ollama model."
        ),
    )




[docs]
def describe_allowlist() -> str:
    """Human-readable summary for UI captions."""

    return (
        "Loading or reloading study data requires a high-capability model: "
        "Claude **Opus ≥ 4.6**, Gemini **Pro ≥ 3.1**, GPT **≥ 5.3**, "
        "or any local **Ollama** model. "
        '"Use Existing Study" is always available regardless of model.'
    )