cbafd30ec7
CI / test (pull_request) Successful in 8s
ir_data fixture 在加载 ir_final.json 后对每条 rule 调用 _normalize_rule, 确保旧 pipeline 输出也能受益于最新的防御性修复(非法 source type、 缺失 section 字段等)。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
309 lines
10 KiB
Python
309 lines
10 KiB
Python
"""Pytest configuration and shared fixtures for QE acceptance tests.
|
|
|
|
Usage::
|
|
|
|
pytest tests/acceptance/ -v --run-acceptance [--acceptance-runs=3]
|
|
|
|
LLM configuration is read from secrets.yaml (searched in order):
|
|
1. QE_SECRETS_PATH env var
|
|
2. ~/.openclaw/config/secrets.yaml
|
|
3. ~/.openclaw/workspace-document-analyzer/config/secrets.yaml
|
|
|
|
deepseek.apiKey / deepseek.baseUrl → text model (deepseek-v4-flash)
|
|
|
|
Environment variables:
|
|
TEST_IR_PATH — path to IR JSON (default: output/final/ir_final.json)
|
|
TEST_PARSED_PATH — path to _parsed.json or _updated.json (default: output/)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
# ── Path setup ──────────────────────────────────────────────────────────────
|
|
|
|
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
sys.path.insert(0, str(_PROJECT_ROOT))
|
|
|
|
# Try multiple known secrets locations (no single hardcoded path)
|
|
_SECRETS_CANDIDATES = [
|
|
Path.home() / ".openclaw" / "config" / "secrets.yaml",
|
|
Path.home() / ".openclaw" / "workspace-document-analyzer" / "config" / "secrets.yaml",
|
|
]
|
|
|
|
# Allow override via environment variable
|
|
_SECRETS_PATH = Path(os.environ.get("QE_SECRETS_PATH", ""))
|
|
|
|
|
|
def _skill_path(skill_name: str) -> str:
|
|
return str(_PROJECT_ROOT / "skills" / skill_name / "scripts")
|
|
|
|
|
|
def _load_secrets() -> dict:
|
|
"""Load LLM configuration from secrets.yaml.
|
|
|
|
Tries paths in order: QE_SECRETS_PATH env var → ~/.openclaw/config/ →
|
|
~/.openclaw/workspace-document-analyzer/config/.
|
|
"""
|
|
paths = [_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES
|
|
for p in paths:
|
|
if p.exists():
|
|
with open(p, "r", encoding="utf-8") as f:
|
|
return yaml.safe_load(f) or {}
|
|
return {}
|
|
|
|
|
|
# ── pytest configuration ────────────────────────────────────────────────────
|
|
|
|
|
|
def pytest_addoption(parser):
|
|
parser.addoption(
|
|
"--run-acceptance",
|
|
action="store_true",
|
|
default=False,
|
|
help="Run QE acceptance tests (requires DASHSCOPE_API_KEY)",
|
|
)
|
|
parser.addoption(
|
|
"--acceptance-runs",
|
|
type=int,
|
|
default=1,
|
|
help="Number of IR generation runs for Layer B stability testing (default: 1 = skip)",
|
|
)
|
|
parser.addoption(
|
|
"--ir-path",
|
|
type=str,
|
|
default=None,
|
|
help="Path to IR JSON file to validate",
|
|
)
|
|
parser.addoption(
|
|
"--parsed-path",
|
|
type=str,
|
|
default=None,
|
|
help="Path to _parsed.json or _updated.json for coverage analysis",
|
|
)
|
|
|
|
|
|
def pytest_configure(config):
|
|
config.addinivalue_line(
|
|
"markers",
|
|
"acceptance: QE acceptance test (requires --run-acceptance flag and DASHSCOPE_API_KEY)",
|
|
)
|
|
|
|
|
|
def pytest_collection_modifyitems(config, items):
|
|
acceptance_dir = str(_PROJECT_ROOT / "tests" / "acceptance")
|
|
acceptance_items = [i for i in items if str(i.fspath).startswith(acceptance_dir)]
|
|
non_acceptance_items = [i for i in items if not str(i.fspath).startswith(acceptance_dir)]
|
|
|
|
if not config.getoption("--run-acceptance"):
|
|
skip_msg = pytest.mark.skip(reason="Need --run-acceptance flag to run")
|
|
for item in acceptance_items:
|
|
item.add_marker(skip_msg)
|
|
return
|
|
|
|
secrets = _load_secrets()
|
|
has_api = bool(secrets.get("deepseek", {}).get("apiKey"))
|
|
if not has_api:
|
|
skip_msg = pytest.mark.skip(reason="No deepseek.apiKey in secrets.yaml")
|
|
for item in acceptance_items:
|
|
item.add_marker(skip_msg)
|
|
|
|
|
|
# ── Shared fixtures ─────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def project_root() -> Path:
|
|
return _PROJECT_ROOT
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def ir_path(request) -> str:
|
|
"""Path to the IR JSON file under test."""
|
|
path = (
|
|
request.config.getoption("--ir-path")
|
|
or os.environ.get("TEST_IR_PATH")
|
|
or str(_PROJECT_ROOT / "output" / "final" / "ir_final.json")
|
|
)
|
|
if not os.path.exists(path):
|
|
pytest.skip(f"IR file not found: {path}")
|
|
return path
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def ir_data(ir_path: str) -> dict:
|
|
"""Load the IR JSON data, normalizing each rule for defensive schema fixes."""
|
|
with open(ir_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
# Apply normalize to every rule so old IR files benefit from latest fixes
|
|
# (invalid source types, missing section fields, trigger nulls, etc.)
|
|
sys.path.insert(0, str(_PROJECT_ROOT / "skills" / "ir_generation_skill"))
|
|
from step3_merge_and_audit import _normalize_rule
|
|
rules = data.get("rules", [])
|
|
if rules:
|
|
data["rules"] = [_normalize_rule(r) for r in rules]
|
|
|
|
return data
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def parsed_path(request) -> str | None:
|
|
"""Path to the corresponding _parsed.json or _updated.json."""
|
|
path = (
|
|
request.config.getoption("--parsed-path")
|
|
or os.environ.get("TEST_PARSED_PATH")
|
|
or str(
|
|
_PROJECT_ROOT / "output" / "车机娱乐系统禁止功能文档_精简_updated.json"
|
|
)
|
|
)
|
|
if os.path.exists(path):
|
|
return path
|
|
return None
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def parsed_data(parsed_path: str | None) -> dict | None:
|
|
"""Load the parsed document JSON for coverage analysis."""
|
|
if parsed_path is None:
|
|
return None
|
|
with open(parsed_path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
|
|
# ── LLM client for acceptance tests ──────────────────────────────────────────
|
|
|
|
|
|
class _AcceptanceLLM:
|
|
"""Thin LLM wrapper for acceptance tests.
|
|
|
|
Uses deepseek-v4-flash for text (Layer C QE audit) via OpenAI-compatible API,
|
|
configured from ~/.openclaw/config/secrets.yaml.
|
|
"""
|
|
|
|
TEXT_MODEL = "deepseek-v4-flash"
|
|
IMAGE_MODEL = "qwen3-vl-plus"
|
|
TIMEOUT = 180
|
|
MAX_RETRIES = 3
|
|
|
|
def __init__(self):
|
|
import time as _time
|
|
import openai
|
|
|
|
secrets = _load_secrets()
|
|
ds = secrets.get("deepseek", {})
|
|
ds_key = ds.get("apiKey", "") or os.environ.get("DEEPSEEK_API_KEY", "")
|
|
ds_base = ds.get("baseUrl", "https://api.deepseek.com/v1")
|
|
|
|
if not ds_key:
|
|
tried = [str(p) for p in ([_SECRETS_PATH] + _SECRETS_CANDIDATES if _SECRETS_PATH.parts else _SECRETS_CANDIDATES)]
|
|
raise RuntimeError(
|
|
"No DeepSeek API key found. Tried:\n "
|
|
+ "\n ".join(tried)
|
|
+ "\nSet deepseek.apiKey in secrets.yaml or DEEPSEEK_API_KEY env var."
|
|
)
|
|
|
|
self._api_key = ds_key
|
|
self._client = openai.OpenAI(
|
|
api_key=ds_key, base_url=ds_base, timeout=self.TIMEOUT, max_retries=self.MAX_RETRIES
|
|
)
|
|
self._prompt_tokens = 0
|
|
self._completion_tokens = 0
|
|
self._time = _time
|
|
|
|
def chat(self, model: str | None = None, messages: list[dict] | None = None,
|
|
response_format: dict | None = None) -> str:
|
|
"""Send a chat completion request and return the text response."""
|
|
model = model or self.TEXT_MODEL
|
|
messages = messages or []
|
|
|
|
for attempt in range(self.MAX_RETRIES):
|
|
try:
|
|
kwargs = {"model": model, "messages": messages}
|
|
if response_format:
|
|
kwargs["response_format"] = response_format
|
|
resp = self._client.chat.completions.create(**kwargs)
|
|
choice = resp.choices[0]
|
|
if choice.finish_reason == "length":
|
|
raise RuntimeError(f"Response truncated (finish_reason=length)")
|
|
usage = resp.usage
|
|
if usage:
|
|
self._prompt_tokens += usage.prompt_tokens or 0
|
|
self._completion_tokens += usage.completion_tokens or 0
|
|
return choice.message.content or ""
|
|
except Exception as e:
|
|
if attempt < self.MAX_RETRIES - 1:
|
|
delay = 2 ** attempt
|
|
self._time.sleep(delay)
|
|
continue
|
|
raise RuntimeError(f"LLM chat failed after {self.MAX_RETRIES} retries: {e}") from e
|
|
return ""
|
|
|
|
@property
|
|
def usage(self) -> dict:
|
|
return {
|
|
"prompt_tokens": self._prompt_tokens,
|
|
"completion_tokens": self._completion_tokens,
|
|
"total_tokens": self._prompt_tokens + self._completion_tokens,
|
|
}
|
|
|
|
@staticmethod
|
|
def estimate_tokens(text: str) -> int:
|
|
return max(1, len(text) // 3)
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def llm_client():
|
|
"""Create an LLM client for acceptance tests.
|
|
|
|
Uses deepseek-v4-flash for text (Layer C QE audit), configured from
|
|
~/.openclaw/config/secrets.yaml deepseek section.
|
|
"""
|
|
return _AcceptanceLLM()
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def acceptance_runs(request) -> int:
|
|
return request.config.getoption("--acceptance-runs", default=1)
|
|
|
|
|
|
# ── Pipeline runner ─────────────────────────────────────────────────────────
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def run_ir_pipeline():
|
|
"""Return a callable that runs the IR generation pipeline on a parsed JSON.
|
|
|
|
Returns None if the pipeline script is not available in the current environment.
|
|
This is common when the acceptance tests run on pre-generated IR output.
|
|
|
|
Usage::
|
|
|
|
runner = run_ir_pipeline()
|
|
if runner:
|
|
ir_data, ir_path = runner(parsed_json_path, output_dir)
|
|
"""
|
|
ir_gen_path = (
|
|
_PROJECT_ROOT / "skills" / "ir_generation_skill" / "scripts" / "ir_generator.py"
|
|
)
|
|
if not ir_gen_path.exists():
|
|
return None
|
|
|
|
sys.path.insert(0, str(ir_gen_path.parent))
|
|
from ir_generator import generate_ir
|
|
|
|
def _run(parsed_path: str, output_dir: str | None = None) -> tuple[list, str]:
|
|
out = output_dir or tempfile.mkdtemp(prefix="qe_acceptance_")
|
|
result = generate_ir(parsed_path, out, dry_run=False)
|
|
return result.get("ir", []), result.get("path", "")
|
|
|
|
return _run
|