"""Pytest configuration and shared fixtures for QE acceptance tests. Usage:: pytest tests/acceptance/ -v --run-acceptance [--acceptance-runs=3] LLM configuration is read from secrets.yaml (searched in order): 1. QE_SECRETS_PATH env var 2. ~/.openclaw/config/secrets.yaml 3. ~/.openclaw/workspace-document-analyzer/config/secrets.yaml deepseek.apiKey / deepseek.baseUrl → text model (deepseek-v4-flash) Environment variables: TEST_IR_PATH — path to IR JSON (default: output/final/ir_final.json) TEST_PARSED_PATH — path to _parsed.json or _updated.json (default: output/) """ from __future__ import annotations import json import os import sys import tempfile from pathlib import Path from typing import Any import pytest import yaml # ── Path setup ────────────────────────────────────────────────────────────── _PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(_PROJECT_ROOT)) # Try multiple known secrets locations (no single hardcoded path) _SECRETS_CANDIDATES = [ Path.home() / ".openclaw" / "config" / "secrets.yaml", Path.home() / ".openclaw" / "workspace-document-analyzer" / "config" / "secrets.yaml", ] # Allow override via environment variable _SECRETS_PATH = Path(os.environ.get("QE_SECRETS_PATH", "")) def _skill_path(skill_name: str) -> str: return str(_PROJECT_ROOT / "skills" / skill_name / "scripts") def _load_secrets() -> dict: """Load LLM configuration from secrets.yaml. Tries paths in order: QE_SECRETS_PATH env var → ~/.openclaw/config/ → ~/.openclaw/workspace-document-analyzer/config/. """ paths = [_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES for p in paths: if p.exists(): with open(p, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {} return {} # ── pytest configuration ──────────────────────────────────────────────────── def pytest_addoption(parser): parser.addoption( "--run-acceptance", action="store_true", default=False, help="Run QE acceptance tests (requires DASHSCOPE_API_KEY)", ) parser.addoption( "--acceptance-runs", type=int, default=1, help="Number of IR generation runs for Layer B stability testing (default: 1 = skip)", ) parser.addoption( "--ir-path", type=str, default=None, help="Path to IR JSON file to validate", ) parser.addoption( "--parsed-path", type=str, default=None, help="Path to _parsed.json or _updated.json for coverage analysis", ) def pytest_configure(config): config.addinivalue_line( "markers", "acceptance: QE acceptance test (requires --run-acceptance flag and DASHSCOPE_API_KEY)", ) def pytest_collection_modifyitems(config, items): acceptance_dir = str(_PROJECT_ROOT / "tests" / "acceptance") acceptance_items = [i for i in items if str(i.fspath).startswith(acceptance_dir)] non_acceptance_items = [i for i in items if not str(i.fspath).startswith(acceptance_dir)] if not config.getoption("--run-acceptance"): skip_msg = pytest.mark.skip(reason="Need --run-acceptance flag to run") for item in acceptance_items: item.add_marker(skip_msg) return secrets = _load_secrets() has_api = bool(secrets.get("deepseek", {}).get("apiKey")) if not has_api: skip_msg = pytest.mark.skip(reason="No deepseek.apiKey in secrets.yaml") for item in acceptance_items: item.add_marker(skip_msg) # ── Shared fixtures ───────────────────────────────────────────────────────── @pytest.fixture(scope="session") def project_root() -> Path: return _PROJECT_ROOT @pytest.fixture(scope="session") def ir_path(request) -> str: """Path to the IR JSON file under test.""" path = ( request.config.getoption("--ir-path") or os.environ.get("TEST_IR_PATH") or str(_PROJECT_ROOT / "output" / "final" / "ir_final.json") ) if not os.path.exists(path): pytest.skip(f"IR file not found: {path}") return path @pytest.fixture(scope="session") def ir_data(ir_path: str) -> dict: """Load the IR JSON data.""" with open(ir_path, "r", encoding="utf-8") as f: return json.load(f) @pytest.fixture(scope="session") def parsed_path(request) -> str | None: """Path to the corresponding _parsed.json or _updated.json.""" path = ( request.config.getoption("--parsed-path") or os.environ.get("TEST_PARSED_PATH") or str( _PROJECT_ROOT / "output" / "车机娱乐系统禁止功能文档_精简_updated.json" ) ) if os.path.exists(path): return path return None @pytest.fixture(scope="session") def parsed_data(parsed_path: str | None) -> dict | None: """Load the parsed document JSON for coverage analysis.""" if parsed_path is None: return None with open(parsed_path, "r", encoding="utf-8") as f: return json.load(f) # ── LLM client for acceptance tests ────────────────────────────────────────── class _AcceptanceLLM: """Thin LLM wrapper for acceptance tests. Uses deepseek-v4-flash for text (Layer C QE audit) via OpenAI-compatible API, configured from ~/.openclaw/config/secrets.yaml. """ TEXT_MODEL = "deepseek-v4-flash" IMAGE_MODEL = "qwen3-vl-plus" TIMEOUT = 180 MAX_RETRIES = 3 def __init__(self): import time as _time import openai secrets = _load_secrets() ds = secrets.get("deepseek", {}) ds_key = ds.get("apiKey", "") or os.environ.get("DEEPSEEK_API_KEY", "") ds_base = ds.get("baseUrl", "https://api.deepseek.com/v1") if not ds_key: tried = [str(p) for p in ([_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES)] raise RuntimeError( "No DeepSeek API key found. Tried:\n " + "\n ".join(tried) + "\nSet deepseek.apiKey in secrets.yaml or DEEPSEEK_API_KEY env var." ) self._api_key = ds_key self._client = openai.OpenAI( api_key=ds_key, base_url=ds_base, timeout=self.TIMEOUT, max_retries=self.MAX_RETRIES ) self._prompt_tokens = 0 self._completion_tokens = 0 self._time = _time def chat(self, model: str | None = None, messages: list[dict] | None = None, response_format: dict | None = None) -> str: """Send a chat completion request and return the text response.""" model = model or self.TEXT_MODEL messages = messages or [] for attempt in range(self.MAX_RETRIES): try: kwargs = {"model": model, "messages": messages} if response_format: kwargs["response_format"] = response_format resp = self._client.chat.completions.create(**kwargs) choice = resp.choices[0] if choice.finish_reason == "length": raise RuntimeError(f"Response truncated (finish_reason=length)") usage = resp.usage if usage: self._prompt_tokens += usage.prompt_tokens or 0 self._completion_tokens += usage.completion_tokens or 0 return choice.message.content or "" except Exception as e: if attempt < self.MAX_RETRIES - 1: delay = 2 ** attempt self._time.sleep(delay) continue raise RuntimeError(f"LLM chat failed after {self.MAX_RETRIES} retries: {e}") from e return "" @property def usage(self) -> dict: return { "prompt_tokens": self._prompt_tokens, "completion_tokens": self._completion_tokens, "total_tokens": self._prompt_tokens + self._completion_tokens, } @staticmethod def estimate_tokens(text: str) -> int: return max(1, len(text) // 3) @pytest.fixture(scope="session") def llm_client(): """Create an LLM client for acceptance tests. Uses deepseek-v4-flash for text (Layer C QE audit), configured from ~/.openclaw/config/secrets.yaml deepseek section. """ return _AcceptanceLLM() @pytest.fixture(scope="session") def acceptance_runs(request) -> int: return request.config.getoption("--acceptance-runs", default=1) # ── Pipeline runner ───────────────────────────────────────────────────────── @pytest.fixture(scope="session") def run_ir_pipeline(): """Return a callable that runs the IR generation pipeline on a parsed JSON. Returns None if the pipeline script is not available in the current environment. This is common when the acceptance tests run on pre-generated IR output. Usage:: runner = run_ir_pipeline() if runner: ir_data, ir_path = runner(parsed_json_path, output_dir) """ ir_gen_path = ( _PROJECT_ROOT / "skills" / "ir_generation_skill" / "scripts" / "ir_generator.py" ) if not ir_gen_path.exists(): return None sys.path.insert(0, str(ir_gen_path.parent)) from ir_generator import generate_ir def _run(parsed_path: str, output_dir: str | None = None) -> tuple[list, str]: out = output_dir or tempfile.mkdtemp(prefix="qe_acceptance_") result = generate_ir(parsed_path, out, dry_run=False) return result.get("ir", []), result.get("path", "") return _run