From 62493d3513af4f1cf3c7797f6128798c5e20e536 Mon Sep 17 00:00:00 2001 From: Peter Zhang <18501667167@qq.com> Date: Sat, 30 May 2026 23:19:56 +0800 Subject: [PATCH] fix: Dev-Agent handles all non-test issues, broaden issue scope beyond qe-feedback label --- agents/DEV_AGENT.md | 18 +++-- scripts/agent_poller.py | 5 +- scripts/create_failure_issue.py | 4 +- scripts/start_dev_agent.bat | 4 +- tests/acceptance/conftest.py | 112 +++++++++++++++++++++++++++++--- 5 files changed, 122 insertions(+), 21 deletions(-) diff --git a/agents/DEV_AGENT.md b/agents/DEV_AGENT.md index 8cf5d40..f08768a 100644 --- a/agents/DEV_AGENT.md +++ b/agents/DEV_AGENT.md @@ -52,10 +52,19 @@ description: AI 开发专家,负责 document_analyzer 项目的功能开发、 ### 1. 轮询 Issue -使用 `python scripts/agent_poller.py --action list` 列出当前开启的、带有以下标签的 Issue: +使用 `python scripts/agent_poller.py --action list` 列出所有当前开启的 Issue。 -- `qe-feedback` — QE-Agent 提交的功能/质量问题 -- `ci-failure` — CI 自动创建的测试失败 Issue +**处理范围**:Dev-Agent 负责处理**所有非纯测试开发**相关的 Issue。具体来说: + +| 处理 | 跳过 | +|------|------| +| `ci-failure` — CI 测试失败 | 标注为 QE-Agent 负责或纯测试实现的 Issue | +| `bug` — 功能缺陷 | | +| `qe-feedback` — QE 反馈的功能/质量问题 | | +| `feature` / `enhancement` — 新功能或改进需求 | | +| 无标签或自定义标签的 Issue | | + +**判断原则**:如果 Issue 涉及功能代码、算法逻辑、IR 生成质量、一致性、覆盖率改进 — 你负责。如果 Issue 纯粹是关于测试框架搭建、测试用例编写 — 那是 QE-Agent 的领域。 ### 2. 分析 Issue @@ -65,7 +74,8 @@ python scripts/agent_poller.py --action get --issue N 根据 Issue 来源决定处理优先级: - **ci-failure**:最高优先级,代码已 break,需要立即修复 -- **qe-feedback**:分析 QE-Agent 的反馈,判断是功能缺失、一致性问题还是覆盖率问题,制定改进方案 +- **bug / qe-feedback**:分析反馈,定位根因,制定修复方案 +- **feature / enhancement**:评估可行性和影响范围,设计方案后实施 ### 3. 开发 / 修复 diff --git a/scripts/agent_poller.py b/scripts/agent_poller.py index 563d10c..35ff7e4 100644 --- a/scripts/agent_poller.py +++ b/scripts/agent_poller.py @@ -19,7 +19,7 @@ GITEA_REPO = os.environ.get("GITEA_REPO", "pzhang_zywl/document_analyzer") GITEA_TOKEN = os.environ.get("GITEA_API_TOKEN", "") BASE = f"{GITEA_URL}/api/v1/repos/{GITEA_REPO}" -TARGET_LABELS = {"qe-feedback", "ci-failure"} +TARGET_LABELS = set() # List all issues, Dev-Agent handles all non-test issues def _req(method, path, data=None): @@ -44,8 +44,7 @@ def list_issues(): return [] for i in issues: labels = [l["name"] for l in i.get("labels", [])] - if TARGET_LABELS & set(labels): - print(f"#{i['number']} [{', '.join(labels)}] {i['title']}") + print(f"#{i['number']} [{', '.join(labels) if labels else 'no label'}] {i['title']}") return issues diff --git a/scripts/create_failure_issue.py b/scripts/create_failure_issue.py index 72be1fe..20691e3 100644 --- a/scripts/create_failure_issue.py +++ b/scripts/create_failure_issue.py @@ -18,8 +18,8 @@ def main(): parser.add_argument("--message", required=True) parser.add_argument("--api-token", default=os.environ.get("GITEA_API_TOKEN", "")) parser.add_argument("--workflow", default="CI", help="Workflow name that triggered this (default: CI)") - parser.add_argument("--labels", default="ci-failure,agent-task", - help="Comma-separated labels for the issue (default: ci-failure,agent-task)") + parser.add_argument("--labels", default="ci-failure", + help="Comma-separated labels for the issue (default: ci-failure)") args = parser.parse_args() sha_short = args.sha[:7] diff --git a/scripts/start_dev_agent.bat b/scripts/start_dev_agent.bat index 6efc894..2a0a7c1 100644 --- a/scripts/start_dev_agent.bat +++ b/scripts/start_dev_agent.bat @@ -23,7 +23,7 @@ set /p MODE="请输入 (1/2/3): " if "%MODE%"=="1" ( echo. echo 正在执行单次检查... - claude -p --agent agents/DEV_AGENT.md "你是 Dev-Agent,检查 Gitea 有没有新的 qe-feedback 或 ci-failure 标签的 Issue,有就领取分析并修复代码,记得同步更新测试。" + claude -p --agent agents/DEV_AGENT.md "你是 Dev-Agent,检查 Gitea 所有打开的 Issue,跳过纯测试相关的,其他全部领取分析并修复,记得同步更新测试。" pause exit ) @@ -32,7 +32,7 @@ if "%MODE%"=="2" ( echo. echo 启动持续轮询模式 (每 10 分钟)... echo 按 Ctrl+C 停止 - claude -p --agent agents/DEV_AGENT.md "你是 Dev-Agent,用 loop 模式每 10 分钟检查一次 Gitea Issues,发现 qe-feedback 或 ci-failure 标签就处理。处理完后在对应 Issue 下评论进度,push 代码触发 CI。" + claude -p --agent agents/DEV_AGENT.md "你是 Dev-Agent,用 loop 模式每 10 分钟检查一次 Gitea 所有打开的 Issue,跳过纯测试相关的,其他全部领取处理。完成后评论进度,push 触发 CI。" pause exit ) diff --git a/tests/acceptance/conftest.py b/tests/acceptance/conftest.py index ce30708..6fdffb2 100644 --- a/tests/acceptance/conftest.py +++ b/tests/acceptance/conftest.py @@ -4,8 +4,11 @@ Usage:: pytest tests/acceptance/ -v --run-acceptance [--acceptance-runs=3] +LLM configuration is read from ``~/.openclaw/config/secrets.yaml``: + deepseek.apiKey / deepseek.baseUrl → text model (deepseek-v4-flash) + dashscope.apiKey / dashscope.baseUrl → vision model (qwen3-vl-plus) + Environment variables: - DASHSCOPE_API_KEY — LLM API key (required for Layers B/C) TEST_IR_PATH — path to IR JSON to validate (default: ir_final.json sample) TEST_PARSED_PATH — path to _parsed.json or _updated.json for coverage analysis """ @@ -20,17 +23,28 @@ from pathlib import Path from typing import Any import pytest +import yaml # ── Path setup ────────────────────────────────────────────────────────────── _PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent sys.path.insert(0, str(_PROJECT_ROOT)) +_SECRETS_PATH = Path.home() / ".openclaw" / "config" / "secrets.yaml" + def _skill_path(skill_name: str) -> str: return str(_PROJECT_ROOT / "skills" / skill_name / "scripts") +def _load_secrets() -> dict: + """Load LLM configuration from secrets.yaml.""" + if _SECRETS_PATH.exists(): + with open(_SECRETS_PATH, "r", encoding="utf-8") as f: + return yaml.safe_load(f) or {} + return {} + + # ── pytest configuration ──────────────────────────────────────────────────── @@ -77,11 +91,12 @@ def pytest_collection_modifyitems(config, items): skip_msg = pytest.mark.skip(reason="Need --run-acceptance flag to run") for item in acceptance_items: item.add_marker(skip_msg) - # Don't skip non-acceptance tests return - if not os.environ.get("DASHSCOPE_API_KEY"): - skip_msg = pytest.mark.skip(reason="DASHSCOPE_API_KEY not set") + secrets = _load_secrets() + has_api = bool(secrets.get("deepseek", {}).get("apiKey")) + if not has_api: + skip_msg = pytest.mark.skip(reason="No deepseek.apiKey in secrets.yaml") for item in acceptance_items: item.add_marker(skip_msg) @@ -142,16 +157,93 @@ def parsed_data(parsed_path: str | None) -> dict | None: return json.load(f) +# ── LLM client for acceptance tests ────────────────────────────────────────── + + +class _AcceptanceLLM: + """Thin LLM wrapper for acceptance tests. + + Uses deepseek-v4-flash for text (Layer C QE audit) via OpenAI-compatible API, + configured from ~/.openclaw/config/secrets.yaml. + """ + + TEXT_MODEL = "deepseek-v4-flash" + IMAGE_MODEL = "qwen3-vl-plus" + TIMEOUT = 180 + MAX_RETRIES = 3 + + def __init__(self): + import time as _time + import openai + + secrets = _load_secrets() + ds = secrets.get("deepseek", {}) + ds_key = ds.get("apiKey", "") or os.environ.get("DEEPSEEK_API_KEY", "") + ds_base = ds.get("baseUrl", "https://api.deepseek.com/v1") + + if not ds_key: + raise RuntimeError( + "No DeepSeek API key found. Set deepseek.apiKey in " + f"{_SECRETS_PATH} or DEEPSEEK_API_KEY env var." + ) + + self._api_key = ds_key + self._client = openai.OpenAI( + api_key=ds_key, base_url=ds_base, timeout=self.TIMEOUT, max_retries=self.MAX_RETRIES + ) + self._prompt_tokens = 0 + self._completion_tokens = 0 + self._time = _time + + def chat(self, model: str | None = None, messages: list[dict] | None = None, + response_format: dict | None = None) -> str: + """Send a chat completion request and return the text response.""" + model = model or self.TEXT_MODEL + messages = messages or [] + + for attempt in range(self.MAX_RETRIES): + try: + kwargs = {"model": model, "messages": messages} + if response_format: + kwargs["response_format"] = response_format + resp = self._client.chat.completions.create(**kwargs) + choice = resp.choices[0] + if choice.finish_reason == "length": + raise RuntimeError(f"Response truncated (finish_reason=length)") + usage = resp.usage + if usage: + self._prompt_tokens += usage.prompt_tokens or 0 + self._completion_tokens += usage.completion_tokens or 0 + return choice.message.content or "" + except Exception as e: + if attempt < self.MAX_RETRIES - 1: + delay = 2 ** attempt + self._time.sleep(delay) + continue + raise RuntimeError(f"LLM chat failed after {self.MAX_RETRIES} retries: {e}") from e + return "" + + @property + def usage(self) -> dict: + return { + "prompt_tokens": self._prompt_tokens, + "completion_tokens": self._completion_tokens, + "total_tokens": self._prompt_tokens + self._completion_tokens, + } + + @staticmethod + def estimate_tokens(text: str) -> int: + return max(1, len(text) // 3) + + @pytest.fixture(scope="session") def llm_client(): - """Create an LLMClient instance for acceptance tests. + """Create an LLM client for acceptance tests. - Uses the DashScope-compatible LLMClient from the project. + Uses deepseek-v4-flash for text (Layer C QE audit), configured from + ~/.openclaw/config/secrets.yaml deepseek section. """ - sys.path.insert(0, _skill_path("doc_parser_skill")) - from LLM import LLMClient - - return LLMClient() + return _AcceptanceLLM() @pytest.fixture(scope="session")