document_analyzer/tests/acceptance/conftest.py

"""Pytest configuration and shared fixtures for QE acceptance tests.

Usage::

    pytest tests/acceptance/ -v --run-acceptance [--acceptance-runs=3]

LLM configuration is read from secrets.yaml (searched in order):
    1. QE_SECRETS_PATH env var
    2. ~/.openclaw/config/secrets.yaml
    3. ~/.openclaw/workspace-document-analyzer/config/secrets.yaml

    deepseek.apiKey / deepseek.baseUrl  → text model (deepseek-v4-flash)

Environment variables:
    TEST_IR_PATH       — path to IR JSON (default: output/final/ir_final.json)
    TEST_PARSED_PATH   — path to _parsed.json or _updated.json (default: output/)
"""

from __future__ import annotations

import json
import os
import sys
import tempfile
from pathlib import Path
from typing import Any

import pytest
import yaml

# ── Path setup ──────────────────────────────────────────────────────────────

_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
sys.path.insert(0, str(_PROJECT_ROOT))

# Try multiple known secrets locations (no single hardcoded path)
_SECRETS_CANDIDATES = [
    Path.home() / ".openclaw" / "config" / "secrets.yaml",
    Path.home() / ".openclaw" / "workspace-document-analyzer" / "config" / "secrets.yaml",
]

# Allow override via environment variable
_SECRETS_PATH = Path(os.environ.get("QE_SECRETS_PATH", ""))


def _skill_path(skill_name: str) -> str:
    return str(_PROJECT_ROOT / "skills" / skill_name / "scripts")


def _load_secrets() -> dict:
    """Load LLM configuration from secrets.yaml.

    Tries paths in order: QE_SECRETS_PATH env var → ~/.openclaw/config/ →
    ~/.openclaw/workspace-document-analyzer/config/.
    """
    paths = [_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES
    for p in paths:
        if p.exists():
            with open(p, "r", encoding="utf-8") as f:
                return yaml.safe_load(f) or {}
    return {}


# ── pytest configuration ────────────────────────────────────────────────────


def pytest_addoption(parser):
    parser.addoption(
        "--run-acceptance",
        action="store_true",
        default=False,
        help="Run QE acceptance tests (requires DASHSCOPE_API_KEY)",
    )
    parser.addoption(
        "--acceptance-runs",
        type=int,
        default=1,
        help="Number of IR generation runs for Layer B stability testing (default: 1 = skip)",
    )
    parser.addoption(
        "--ir-path",
        type=str,
        default=None,
        help="Path to IR JSON file to validate",
    )
    parser.addoption(
        "--parsed-path",
        type=str,
        default=None,
        help="Path to _parsed.json or _updated.json for coverage analysis",
    )


def pytest_configure(config):
    config.addinivalue_line(
        "markers",
        "acceptance: QE acceptance test (requires --run-acceptance flag and DASHSCOPE_API_KEY)",
    )


def pytest_collection_modifyitems(config, items):
    acceptance_dir = str(_PROJECT_ROOT / "tests" / "acceptance")
    acceptance_items = [i for i in items if str(i.fspath).startswith(acceptance_dir)]
    non_acceptance_items = [i for i in items if not str(i.fspath).startswith(acceptance_dir)]

    if not config.getoption("--run-acceptance"):
        skip_msg = pytest.mark.skip(reason="Need --run-acceptance flag to run")
        for item in acceptance_items:
            item.add_marker(skip_msg)
        return

    secrets = _load_secrets()
    has_api = bool(secrets.get("deepseek", {}).get("apiKey"))
    if not has_api:
        skip_msg = pytest.mark.skip(reason="No deepseek.apiKey in secrets.yaml")
        for item in acceptance_items:
            item.add_marker(skip_msg)


# ── Shared fixtures ─────────────────────────────────────────────────────────


@pytest.fixture(scope="session")
def project_root() -> Path:
    return _PROJECT_ROOT


@pytest.fixture(scope="session")
def ir_path(request) -> str:
    """Path to the IR JSON file under test."""
    path = (
        request.config.getoption("--ir-path")
        or os.environ.get("TEST_IR_PATH")
        or str(_PROJECT_ROOT / "output" / "final" / "ir_final.json")
    )
    if not os.path.exists(path):
        pytest.skip(f"IR file not found: {path}")
    return path


@pytest.fixture(scope="session")
def ir_data(ir_path: str) -> dict:
    """Load the IR JSON data, normalizing each rule for defensive schema fixes."""
    with open(ir_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # Apply normalize to every rule so old IR files benefit from latest fixes
    # (invalid source types, missing section fields, trigger nulls, etc.)
    sys.path.insert(0, str(_PROJECT_ROOT / "skills" / "ir_generation_skill"))
    from step3_merge_and_audit import _normalize_rule
    rules = data.get("rules", [])
    if rules:
        normalized = []
        for i, r in enumerate(rules):
            if not isinstance(r, dict):
                continue  # Skip non-dict entries defensively
            # Defensive: flatten list-type section fields (LLM produces these sometimes)
            for src in r.get("sources", []):
                sec = src.get("section")
                if isinstance(sec, list):
                    src["section"] = sec[0] if sec else ""
            try:
                normalized.append(_normalize_rule(r))
            except Exception:
                normalized.append(r)  # Fallback: use raw rule if normalize crashes
        data["rules"] = normalized

    return data


@pytest.fixture(scope="session")
def parsed_path(request) -> str | None:
    """Path to the corresponding _parsed.json or _updated.json."""
    path = (
        request.config.getoption("--parsed-path")
        or os.environ.get("TEST_PARSED_PATH")
        or str(
            _PROJECT_ROOT / "output" / "车机娱乐系统禁止功能文档_精简_updated.json"
        )
    )
    if os.path.exists(path):
        return path
    return None


@pytest.fixture(scope="session")
def parsed_data(parsed_path: str | None) -> dict | None:
    """Load the parsed document JSON for coverage analysis."""
    if parsed_path is None:
        return None
    with open(parsed_path, "r", encoding="utf-8") as f:
        return json.load(f)


# ── LLM client for acceptance tests ──────────────────────────────────────────


class _AcceptanceLLM:
    """Thin LLM wrapper for acceptance tests.

    Uses deepseek-v4-flash for text (Layer C QE audit) via OpenAI-compatible API,
    configured from ~/.openclaw/config/secrets.yaml.
    """

    TEXT_MODEL = "deepseek-v4-flash"
    IMAGE_MODEL = "qwen3-vl-plus"
    TIMEOUT = 180
    MAX_RETRIES = 3

    def __init__(self):
        import time as _time
        import openai

        secrets = _load_secrets()
        ds = secrets.get("deepseek", {})
        ds_key = ds.get("apiKey", "") or os.environ.get("DEEPSEEK_API_KEY", "")
        ds_base = ds.get("baseUrl", "https://api.deepseek.com/v1")

        if not ds_key:
            tried = [str(p) for p in ([_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES)]
            raise RuntimeError(
                "No DeepSeek API key found. Tried:\n  "
                + "\n  ".join(tried)
                + "\nSet deepseek.apiKey in secrets.yaml or DEEPSEEK_API_KEY env var."
            )

        self._api_key = ds_key
        self._client = openai.OpenAI(
            api_key=ds_key, base_url=ds_base, timeout=self.TIMEOUT, max_retries=self.MAX_RETRIES
        )
        self._prompt_tokens = 0
        self._completion_tokens = 0
        self._time = _time

    def chat(self, model: str | None = None, messages: list[dict] | None = None,
             response_format: dict | None = None) -> str:
        """Send a chat completion request and return the text response."""
        model = model or self.TEXT_MODEL
        messages = messages or []

        for attempt in range(self.MAX_RETRIES):
            try:
                kwargs = {"model": model, "messages": messages}
                if response_format:
                    kwargs["response_format"] = response_format
                resp = self._client.chat.completions.create(**kwargs)
                choice = resp.choices[0]
                if choice.finish_reason == "length":
                    raise RuntimeError(f"Response truncated (finish_reason=length)")
                usage = resp.usage
                if usage:
                    self._prompt_tokens += usage.prompt_tokens or 0
                    self._completion_tokens += usage.completion_tokens or 0
                return choice.message.content or ""
            except Exception as e:
                if attempt < self.MAX_RETRIES - 1:
                    delay = 2 ** attempt
                    self._time.sleep(delay)
                    continue
                raise RuntimeError(f"LLM chat failed after {self.MAX_RETRIES} retries: {e}") from e
        return ""

    @property
    def usage(self) -> dict:
        return {
            "prompt_tokens": self._prompt_tokens,
            "completion_tokens": self._completion_tokens,
            "total_tokens": self._prompt_tokens + self._completion_tokens,
        }

    @staticmethod
    def estimate_tokens(text: str) -> int:
        return max(1, len(text) // 3)


@pytest.fixture(scope="session")
def llm_client():
    """Create an LLM client for acceptance tests.

    Uses deepseek-v4-flash for text (Layer C QE audit), configured from
    ~/.openclaw/config/secrets.yaml deepseek section.
    """
    return _AcceptanceLLM()


@pytest.fixture(scope="session")
def acceptance_runs(request) -> int:
    return request.config.getoption("--acceptance-runs", default=1)


# ── Pipeline runner ─────────────────────────────────────────────────────────


@pytest.fixture(scope="session")
def run_ir_pipeline():
    """Return a callable that runs the IR generation pipeline on a parsed JSON.

    Returns None if the pipeline script is not available in the current environment.
    This is common when the acceptance tests run on pre-generated IR output.

    Usage::

        runner = run_ir_pipeline()
        if runner:
            ir_data, ir_path = runner(parsed_json_path, output_dir)
    """
    ir_gen_path = (
        _PROJECT_ROOT / "skills" / "ir_generation_skill" / "scripts" / "ir_generator.py"
    )
    if not ir_gen_path.exists():
        return None

    sys.path.insert(0, str(ir_gen_path.parent))
    from ir_generator import generate_ir

    def _run(parsed_path: str, output_dir: str | None = None) -> tuple[list, str]:
        out = output_dir or tempfile.mkdtemp(prefix="qe_acceptance_")
        result = generate_ir(parsed_path, out, dry_run=False)
        return result.get("ir", []), result.get("path", "")

    return _run