sync: update all skills from latest workspace code

doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
@@ -0,0 +1,322 @@
+"""
+Tests for Stage 2 (IR Extraction).
+
+Validates that ir_fragments.json meets quality and structural requirements:
+- All fragments have non-empty rules
+- All rules have path arrays
+- All rules have precondition.geographic_scope and precondition.screen_type
+- All trigger conditions have signal/operator/value
+- user_interaction content is non-empty and not a placeholder
+- No duplicate rule_ids (across all fragments)
+"""
+
+import json
+import sys
+from pathlib import Path
+from collections import Counter
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+# Forbidden placeholder phrases in user_interaction content
+FORBIDDEN_PLACEHOLDERS = [
+    "文案由业务定义", "待定", "自定义", "TBD", "todo", "TODO"
+]
+
+
+def load_fragments():
+    """Load ir_fragments.json."""
+    try:
+        return config.load_json(config.IR_FRAGMENTS_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} ir_fragments.json 未找到: {config.IR_FRAGMENTS_JSON}")
+        print("  请先运行 step2_ir_extraction.py")
+        sys.exit(1)
+
+
+def check_non_empty_rules(fragments: list[dict]) -> list[str]:
+    """Every fragment must have at least one rule."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        rules = f.get("rules", [])
+        if not rules:
+            if f.get("error"):
+                errors.append(f"{uid}: 提取失败 — {f['error']}")
+            else:
+                errors.append(f"{uid}: rules 为空")
+    return errors
+
+
+def check_rule_paths(fragments: list[dict]) -> list[str]:
+    """Every rule must have a non-empty path array."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            path = rule.get("path", [])
+            if not path:
+                errors.append(f"{rid}: path 字段为空或缺失")
+            elif not isinstance(path, list):
+                errors.append(f"{rid}: path 必须是数组")
+    return errors
+
+
+def check_precondition_fields(fragments: list[dict]) -> list[str]:
+    """Every rule must have precondition with geographic_scope and screen_type."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            precond = rule.get("precondition", {})
+            if not precond:
+                errors.append(f"{rid}: precondition 缺失")
+                continue
+            if not precond.get("geographic_scope"):
+                errors.append(f"{rid}: precondition.geographic_scope 缺失")
+            if "screen_type" not in precond:
+                errors.append(f"{rid}: precondition.screen_type 缺失")
+    return errors
+
+
+def check_user_interaction_content(fragments: list[dict]) -> list[str]:
+    """user_interaction actions must have non-empty, non-placeholder content."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            for k, action in enumerate(rule.get("actions", [])):
+                if action.get("type") != "user_interaction":
+                    continue
+                content = action.get("content", "")
+                if not content:
+                    errors.append(
+                        f"{rid}.actions[{k}]: user_interaction 的 content 为空"
+                    )
+                elif any(ph in content for ph in FORBIDDEN_PLACEHOLDERS):
+                    errors.append(
+                        f"{rid}.actions[{k}]: content 包含占位符: '{content}'"
+                    )
+    return errors
+
+
+def check_sources_have_logic_tree_nodes(fragments: list[dict]) -> list[str]:
+    """Every rule should reference at least one logic tree node in its sources."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            sources = rule.get("sources", [])
+            has_logic_tree = any(
+                src.get("type") == "logic_tree" and src.get("node_ids")
+                for src in sources
+            )
+            if not has_logic_tree:
+                has_text = any(
+                    src.get("type") in ("table", "para") for src in sources
+                )
+                if not has_text:
+                    errors.append(f"{rid}: sources 中既无逻辑树引用也无文字引用")
+    return errors
+
+
+def check_trigger_conditions(fragments: list[dict]) -> list[str]:
+    """Every trigger condition must have signal, operator, value."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            trigger = rule.get("trigger", {})
+            conditions = trigger.get("conditions", [])
+
+            if trigger.get("event") is not None:
+                continue
+
+            for k, cond in enumerate(conditions):
+                signal = cond.get("signal", "")
+                operator = cond.get("operator", "")
+                has_value = "value" in cond
+
+                if not signal:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 signal")
+                if not operator:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 operator")
+                if not has_value:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 value")
+
+    return errors
+
+
+def check_duplicate_rule_ids(fragments: list[dict]) -> list[str]:
+    """Check for duplicate rule_ids across all fragments."""
+    all_rule_ids = []
+    for f in fragments:
+        for rule in f.get("rules", []):
+            rid = rule.get("rule_id", "")
+            if rid:
+                all_rule_ids.append(rid)
+
+    duplicates = [rid for rid, count in Counter(all_rule_ids).items() if count > 1]
+    errors = []
+    if duplicates:
+        errors.append(f"重复 rule_id: {duplicates}")
+    return errors
+
+
+def check_action_types(fragments: list[dict]) -> list[str]:
+    """Verify that actions have valid types."""
+    valid_types = {"system", "user_interaction"}
+    errors = []
+    for f in fragments:
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            for k, action in enumerate(rule.get("actions", [])):
+                atype = action.get("type", "")
+                if atype not in valid_types:
+                    errors.append(
+                        f"{rid}.action[{k}]: type='{atype}' 无效, "
+                        f"应为 {valid_types}"
+                    )
+                if atype == "user_interaction" and "content" not in action:
+                    errors.append(
+                        f"{rid}.action[{k}]: user_interaction 类型缺少 content 字段"
+                    )
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 2 自检测试")
+    print("=" * 60)
+
+    fragments = load_fragments()
+    all_errors = []
+    total_units = len(fragments)
+    total_rules = sum(len(f.get("rules", [])) for f in fragments)
+
+    # Test 1: Non-empty rules
+    errors = check_non_empty_rules(fragments)
+    if errors:
+        print(f"\n{FAIL} 非空规则检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 非空规则检查: 全部通过 ({total_units} 个片段)")
+
+    # Test 2: Rule path arrays
+    errors = check_rule_paths(fragments)
+    if errors:
+        print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则 path 字段: 全部通过")
+
+    # Test 3: Precondition fields
+    errors = check_precondition_fields(fragments)
+    if errors:
+        print(f"\n{FAIL} precondition 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} precondition 字段: 全部通过")
+
+    # Test 4: user_interaction content
+    errors = check_user_interaction_content(fragments)
+    if errors:
+        print(f"\n{FAIL} user_interaction content: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} user_interaction content: 全部通过")
+
+    # Test 5: Sources have logic tree references
+    errors = check_sources_have_logic_tree_nodes(fragments)
+    if errors:
+        print(f"\n{FAIL} 来源节点引用: {len(errors)} 个规则缺少来源引用")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 来源节点引用: 全部通过")
+
+    # Test 6: Trigger conditions completeness
+    errors = check_trigger_conditions(fragments)
+    if errors:
+        print(f"\n{FAIL} 触发条件完整性: {len(errors)} 个条件不完整")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 触发条件完整性: 全部通过")
+
+    # Test 7: No duplicate rule_ids
+    errors = check_duplicate_rule_ids(fragments)
+    if errors:
+        print(f"\n{FAIL} rule_id 唯一性: 发现重复")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} rule_id 唯一性: 全部通过")
+
+    # Test 8: Valid action types
+    errors = check_action_types(fragments)
+    if errors:
+        print(f"\n{FAIL} 动作类型检查: {len(errors)} 个问题")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 动作类型检查: 全部通过")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+
+    if total_failures == 0:
+        print(f"{PASS} 所有测试通过!")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误")
+        print("\n建议:")
+        print("  1. 检查 ir_fragments.json 中出错的规则")
+        print("  2. 如果某些功能单元的规则为空，检查上下文包是否丢失了关键信息")
+        print("  3. 调整 Prompt (prompts/step2_ir_extraction.txt) 后重新运行")
+
+    print(f"\n统计:")
+    print(f"  功能单元数: {total_units}")
+    print(f"  规则总数: {total_rules}")
+    error_units = sum(1 for f in fragments if f.get("error"))
+    if error_units:
+        print(f"  提取失败的单元: {error_units}")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)