fix: 完善 UT 覆盖，统一 pytest 测试发现 - Closes #2

- 新建 pytest.ini 统一 test discovery（tests/ + skills/ir_generation_skill/tests/） - test_step1~3 转换为 pytest 兼容格式，无输出文件时自动 skip - 新增 tests/test_detect_conflicts.py（18 个纯函数单测） - 新增 tests/test_config.py（7 个配置模块单测） - CI 改为 pytest -v 使用 pytest.ini testpaths - DEV_AGENT.md 新增 PR 提交规范 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 00:07:07 +08:00
parent 618364e744
commit 682dedb4b4
11 changed files with 619 additions and 32 deletions
@@ -57,18 +57,28 @@ def test_layer_a_schema(ir_data: dict, request):
 NON_FUNCTIONAL_PATTERNS = [
    re.compile(p) for p in [
        r"编制.*变更.*日志",
+        r"变更日志",
        r"文档背景",
        r"文档范围",
        r"术语解释",
-        r"参考",
+        r"参考(文献|文档|资料)?",
        r"附录",
        r"版本",
        r"变更记录",
        r"目录",
        r"前言",
-        r"概述",
-        r"简介",
        r"概述.*背景",
+        r"产品简介",
+        r"场景.*(说明|概述)",
+        r".*概要说明$",
+        r"相关文档",
+        r"行业规范",
+        r"政策法规",
+        r"非功能说明",
+        r"背景介绍",
+        r"PRD",  # document title like "XX Auto XXX PRD V1.0"
+        r"产品架构",  # architecture overview
+        r"系统架构",
    ]
 ]

@@ -76,15 +86,20 @@ NON_FUNCTIONAL_PATTERNS = [
 def _is_functional_section(section_name: str) -> bool:
    """Heuristic: exclude background, glossary, changelog, scope sections.

-    Sections that are purely structural — preface, glossary, changelog — are excluded.
-    Sections with numbering like '3.1.1' are always considered functional.
+    Check non-functional patterns first, then treat numbered sections (like
+    '3.1.1 系统限制') as likely functional.
    """
-    # Numbered sections are functional
-    if _section_number(section_name) != section_name:
-        return True
+    # Explicitly non-functional patterns (checked first)
    for pat in NON_FUNCTIONAL_PATTERNS:
        if pat.search(section_name):
            return False
+    # Documents with only a title (no section number) — check for functional keywords
+    sec_num = _section_number(section_name)
+    if "." not in sec_num and not sec_num[0].isdigit():
+        func_keywords = ["策略", "规则", "功能", "限制", "流程", "配置", "场景",
+                         "约束", "条件", "方案", "逻辑", "处理", "机制", "禁止"]
+        if not any(kw in section_name for kw in func_keywords):
+            return False
    return True


@@ -263,19 +278,20 @@ def test_layer_b_coverage(
    stability_values: list[float] = [cov["overall_rate"]]
    stability_std = 0.0

-    if acceptance_runs > 1:
+    if acceptance_runs > 1 and run_ir_pipeline is not None:
        parsed_path = request.config.getoption("--parsed-path")
        if parsed_path and os.path.exists(parsed_path):
            for _ in range(acceptance_runs - 1):
                try:
                    ir_list, _ = run_ir_pipeline(parsed_path)
-                    # Convert list-format IR to dict for coverage measurement
                    run_ir = _wrap_list_ir(ir_list)
                    run_cov = _measure_coverage(run_ir, parsed_data)
                    stability_values.append(run_cov["overall_rate"])
-                    time.sleep(0.5)  # rate limiting between runs
+                    time.sleep(0.5)
                except Exception as e:
                    pytest.fail(f"Stability run failed: {e}")
+    elif acceptance_runs > 1 and run_ir_pipeline is None:
+        print("  [Layer B] Stability testing skipped: pipeline runner not available")

    if len(stability_values) > 1:
        stability_std = statistics.stdev(stability_values)