sync: update all skills from latest workspace code

doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
@@ -0,0 +1,232 @@
+"""
+Tests for Stage 3 (Merge & Audit).
+
+Validates:
+- ir_final.json exists and is well-formed
+- No duplicate rule_ids
+- All rule_ids follow new hierarchical naming convention
+- All rules have path arrays
+- ir_audit_report.md exists and contains all required sections
+"""
+
+import re
+import sys
+from pathlib import Path
+from collections import Counter
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+
+def load_ir_final():
+    """Load ir_final.json."""
+    try:
+        return config.load_json(config.IR_FINAL_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} ir_final.json 未找到: {config.IR_FINAL_JSON}")
+        print("  请先运行 step3_merge_and_audit.py")
+        sys.exit(1)
+
+
+def load_audit_report():
+    """Load ir_audit_report.md if it exists."""
+    try:
+        with open(config.IR_AUDIT_REPORT_MD, "r", encoding="utf-8") as f:
+            return f.read()
+    except FileNotFoundError:
+        print(f"{FAIL} ir_audit_report.md 未找到: {config.IR_AUDIT_REPORT_MD}")
+        print("  请先运行 step3_merge_and_audit.py")
+        sys.exit(1)
+
+
+def check_rule_ids(ir: dict) -> list[str]:
+    """Check for duplicate rule_ids and hierarchical naming convention.
+
+    Format: DRL-001-DOMESTIC-SYS-FG-INTERRUPT-01
+    """
+    errors = []
+    rules = ir.get("rules", [])
+    rule_ids = [r.get("rule_id", "") for r in rules]
+
+    # No duplicates
+    duplicates = [rid for rid, count in Counter(rule_ids).items() if count > 1]
+    if duplicates:
+        errors.append(f"重复 rule_id: {duplicates}")
+
+    # New hierarchical naming convention
+    pattern = re.compile(
+        r"^[A-Z]+-\d{3}-(DOMESTIC|OVERSEAS)-"
+        r"(SYS|SDK|OTHER)-"
+        r"(FG-INTERRUPT|BG-BLOCK|BG-PAUSE|NO-RESTRICT|SWITCH-OFF)-\d{2}$"
+    )
+    for rid in rule_ids:
+        if rid and not pattern.match(rid):
+            errors.append(
+                f"rule_id 命名不规范: '{rid}' "
+                f"(期望: FEATURE-SCOPE-METHOD-BEHAVIOR-NN)"
+            )
+
+    return errors
+
+
+def check_top_level_structure(ir: dict) -> list[str]:
+    """Check that ir_final has the required top-level fields."""
+    errors = []
+    for field in ["feature", "feature_id", "rules"]:
+        if field not in ir:
+            errors.append(f"ir_final 缺少顶层字段: {field}")
+
+    if not isinstance(ir.get("rules"), list):
+        errors.append("ir_final.rules 必须是数组")
+    elif len(ir["rules"]) == 0:
+        errors.append("ir_final.rules 为空")
+
+    return errors
+
+
+def check_rule_paths(rules: list[dict]) -> list[str]:
+    """Every rule must have a non-empty path array."""
+    errors = []
+    for rule in rules:
+        rid = rule.get("rule_id", "?")
+        path = rule.get("path", [])
+        if not path:
+            errors.append(f"{rid}: path 字段为空或缺失")
+    return errors
+
+
+def check_rule_completeness(rules: list[dict]) -> list[str]:
+    """Check each rule has all required fields."""
+    errors = []
+    required_fields = [
+        "rule_id", "description", "priority", "sources",
+        "precondition", "trigger", "actions"
+    ]
+    for i, rule in enumerate(rules):
+        rid = rule.get("rule_id", f"rule[{i}]")
+        for field in required_fields:
+            if field not in rule:
+                errors.append(f"{rid}: 缺少字段 '{field}'")
+        if not rule.get("sources"):
+            errors.append(f"{rid}: sources 为空")
+        if not rule.get("actions"):
+            errors.append(f"{rid}: actions 为空")
+        # Check precondition fields
+        precond = rule.get("precondition", {})
+        if not precond.get("geographic_scope"):
+            errors.append(f"{rid}: precondition.geographic_scope 缺失")
+        if "screen_type" not in precond:
+            errors.append(f"{rid}: precondition.screen_type 缺失")
+    return errors
+
+
+def check_audit_report(report: str) -> list[str]:
+    """Check audit report has all required sections."""
+    errors = []
+
+    required_sections = [
+        "逻辑树路径覆盖率",
+        "表格枚举覆盖",
+        "开关状态",
+        "一致性扫描报告",
+        "自动补全摘要",
+        "规则清单",
+    ]
+    for section in required_sections:
+        if section not in report:
+            errors.append(f"审计报告缺少章节: {section}")
+
+    # Should have the human review notice
+    if "人工审查" not in report:
+        errors.append("审计报告缺少人工审查提示")
+
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 3 自检测试")
+    print("=" * 60)
+
+    ir = load_ir_final()
+    report = load_audit_report()
+    rules = ir.get("rules", [])
+    all_errors = []
+
+    # Test 1: Top-level structure
+    errors = check_top_level_structure(ir)
+    if errors:
+        print(f"\n{FAIL} 顶层结构检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 顶层结构检查: 通过 "
+              f"(feature={ir.get('feature')}, feature_id={ir.get('feature_id')})")
+
+    # Test 2: rule_id uniqueness and naming
+    errors = check_rule_ids(ir)
+    if errors:
+        print(f"\n{FAIL} rule_id 检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} rule_id 检查: 全部通过 ({len(rules)} 个唯一 ID, 层次化格式)")
+
+    # Test 3: Rule path fields
+    errors = check_rule_paths(rules)
+    if errors:
+        print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则 path 字段: 全部通过")
+
+    # Test 4: Rule field completeness
+    errors = check_rule_completeness(rules)
+    if errors:
+        print(f"\n{FAIL} 规则字段完整性: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则字段完整性: 全部通过")
+
+    # Test 5: Audit report content
+    errors = check_audit_report(report)
+    if errors:
+        print(f"\n{FAIL} 审计报告检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 审计报告检查: 全部通过 (6 个章节)")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+
+    if total_failures == 0:
+        print(f"{PASS} 所有测试通过!")
+        print(f"\n最终交付物:")
+        print(f"  - {config.IR_FINAL_JSON} ({len(rules)} 条规则)")
+        print(f"  - {config.IR_AUDIT_REPORT_MD}")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误")
+        print("\n建议: 检查 ir_fragments.json 和合并逻辑，修复问题后重新运行 step3_merge_and_audit.py")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)