fix: Dev-Agent handles all non-test issues, broaden issue scope beyond qe-feedback label
CI / test (push) Successful in 7s
CI / test (push) Successful in 7s
This commit is contained in:
+102
-10
@@ -4,8 +4,11 @@ Usage::
|
||||
|
||||
pytest tests/acceptance/ -v --run-acceptance [--acceptance-runs=3]
|
||||
|
||||
LLM configuration is read from ``~/.openclaw/config/secrets.yaml``:
|
||||
deepseek.apiKey / deepseek.baseUrl → text model (deepseek-v4-flash)
|
||||
dashscope.apiKey / dashscope.baseUrl → vision model (qwen3-vl-plus)
|
||||
|
||||
Environment variables:
|
||||
DASHSCOPE_API_KEY — LLM API key (required for Layers B/C)
|
||||
TEST_IR_PATH — path to IR JSON to validate (default: ir_final.json sample)
|
||||
TEST_PARSED_PATH — path to _parsed.json or _updated.json for coverage analysis
|
||||
"""
|
||||
@@ -20,17 +23,28 @@ from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
# ── Path setup ──────────────────────────────────────────────────────────────
|
||||
|
||||
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(_PROJECT_ROOT))
|
||||
|
||||
_SECRETS_PATH = Path.home() / ".openclaw" / "config" / "secrets.yaml"
|
||||
|
||||
|
||||
def _skill_path(skill_name: str) -> str:
|
||||
return str(_PROJECT_ROOT / "skills" / skill_name / "scripts")
|
||||
|
||||
|
||||
def _load_secrets() -> dict:
|
||||
"""Load LLM configuration from secrets.yaml."""
|
||||
if _SECRETS_PATH.exists():
|
||||
with open(_SECRETS_PATH, "r", encoding="utf-8") as f:
|
||||
return yaml.safe_load(f) or {}
|
||||
return {}
|
||||
|
||||
|
||||
# ── pytest configuration ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -77,11 +91,12 @@ def pytest_collection_modifyitems(config, items):
|
||||
skip_msg = pytest.mark.skip(reason="Need --run-acceptance flag to run")
|
||||
for item in acceptance_items:
|
||||
item.add_marker(skip_msg)
|
||||
# Don't skip non-acceptance tests
|
||||
return
|
||||
|
||||
if not os.environ.get("DASHSCOPE_API_KEY"):
|
||||
skip_msg = pytest.mark.skip(reason="DASHSCOPE_API_KEY not set")
|
||||
secrets = _load_secrets()
|
||||
has_api = bool(secrets.get("deepseek", {}).get("apiKey"))
|
||||
if not has_api:
|
||||
skip_msg = pytest.mark.skip(reason="No deepseek.apiKey in secrets.yaml")
|
||||
for item in acceptance_items:
|
||||
item.add_marker(skip_msg)
|
||||
|
||||
@@ -142,16 +157,93 @@ def parsed_data(parsed_path: str | None) -> dict | None:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
# ── LLM client for acceptance tests ──────────────────────────────────────────
|
||||
|
||||
|
||||
class _AcceptanceLLM:
|
||||
"""Thin LLM wrapper for acceptance tests.
|
||||
|
||||
Uses deepseek-v4-flash for text (Layer C QE audit) via OpenAI-compatible API,
|
||||
configured from ~/.openclaw/config/secrets.yaml.
|
||||
"""
|
||||
|
||||
TEXT_MODEL = "deepseek-v4-flash"
|
||||
IMAGE_MODEL = "qwen3-vl-plus"
|
||||
TIMEOUT = 180
|
||||
MAX_RETRIES = 3
|
||||
|
||||
def __init__(self):
|
||||
import time as _time
|
||||
import openai
|
||||
|
||||
secrets = _load_secrets()
|
||||
ds = secrets.get("deepseek", {})
|
||||
ds_key = ds.get("apiKey", "") or os.environ.get("DEEPSEEK_API_KEY", "")
|
||||
ds_base = ds.get("baseUrl", "https://api.deepseek.com/v1")
|
||||
|
||||
if not ds_key:
|
||||
raise RuntimeError(
|
||||
"No DeepSeek API key found. Set deepseek.apiKey in "
|
||||
f"{_SECRETS_PATH} or DEEPSEEK_API_KEY env var."
|
||||
)
|
||||
|
||||
self._api_key = ds_key
|
||||
self._client = openai.OpenAI(
|
||||
api_key=ds_key, base_url=ds_base, timeout=self.TIMEOUT, max_retries=self.MAX_RETRIES
|
||||
)
|
||||
self._prompt_tokens = 0
|
||||
self._completion_tokens = 0
|
||||
self._time = _time
|
||||
|
||||
def chat(self, model: str | None = None, messages: list[dict] | None = None,
|
||||
response_format: dict | None = None) -> str:
|
||||
"""Send a chat completion request and return the text response."""
|
||||
model = model or self.TEXT_MODEL
|
||||
messages = messages or []
|
||||
|
||||
for attempt in range(self.MAX_RETRIES):
|
||||
try:
|
||||
kwargs = {"model": model, "messages": messages}
|
||||
if response_format:
|
||||
kwargs["response_format"] = response_format
|
||||
resp = self._client.chat.completions.create(**kwargs)
|
||||
choice = resp.choices[0]
|
||||
if choice.finish_reason == "length":
|
||||
raise RuntimeError(f"Response truncated (finish_reason=length)")
|
||||
usage = resp.usage
|
||||
if usage:
|
||||
self._prompt_tokens += usage.prompt_tokens or 0
|
||||
self._completion_tokens += usage.completion_tokens or 0
|
||||
return choice.message.content or ""
|
||||
except Exception as e:
|
||||
if attempt < self.MAX_RETRIES - 1:
|
||||
delay = 2 ** attempt
|
||||
self._time.sleep(delay)
|
||||
continue
|
||||
raise RuntimeError(f"LLM chat failed after {self.MAX_RETRIES} retries: {e}") from e
|
||||
return ""
|
||||
|
||||
@property
|
||||
def usage(self) -> dict:
|
||||
return {
|
||||
"prompt_tokens": self._prompt_tokens,
|
||||
"completion_tokens": self._completion_tokens,
|
||||
"total_tokens": self._prompt_tokens + self._completion_tokens,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def estimate_tokens(text: str) -> int:
|
||||
return max(1, len(text) // 3)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def llm_client():
|
||||
"""Create an LLMClient instance for acceptance tests.
|
||||
"""Create an LLM client for acceptance tests.
|
||||
|
||||
Uses the DashScope-compatible LLMClient from the project.
|
||||
Uses deepseek-v4-flash for text (Layer C QE audit), configured from
|
||||
~/.openclaw/config/secrets.yaml deepseek section.
|
||||
"""
|
||||
sys.path.insert(0, _skill_path("doc_parser_skill"))
|
||||
from LLM import LLMClient
|
||||
|
||||
return LLMClient()
|
||||
return _AcceptanceLLM()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
||||
Reference in New Issue
Block a user