Files
document_analyzer/skills/ir_generation_skill/tests/test_step2.py
T
pzhang_zywl fec4c09ee0
CI / test (push) Successful in 8s
sync: update all skills from latest workspace code
doc_parser_skill:
- New: verify_flowchart.py (flowchart validation)
- Updated: LLM.py (multi-provider: DeepSeek + DashScope)
- Updated: image_parser.py (logic tree support, external prompts)
- Updated: SKILL.md, prompts/image_prompt.md

conflict_detection_skill:
- Updated: LLM.py (multi-provider sync)
- Updated: detect_conflicts.py (logic tree text conversion)

ir_generation_skill:
- Replaced old scripts/LLM.py + ir_generator.py with standalone project
- New: main.py, config.py, step1-3_*.py, ensemble_merge.py
- New: prompts/, tests/ subdirectories

tests:
- New: acceptance/ test suite with schema validation
- Fixed: conftest no longer globally skips non-acceptance tests
- Updated: test_sample.py for new ir_generation structure

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00

323 lines
11 KiB
Python

"""
Tests for Stage 2 (IR Extraction).
Validates that ir_fragments.json meets quality and structural requirements:
- All fragments have non-empty rules
- All rules have path arrays
- All rules have precondition.geographic_scope and precondition.screen_type
- All trigger conditions have signal/operator/value
- user_interaction content is non-empty and not a placeholder
- No duplicate rule_ids (across all fragments)
"""
import json
import sys
from pathlib import Path
from collections import Counter
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"
# Forbidden placeholder phrases in user_interaction content
FORBIDDEN_PLACEHOLDERS = [
"文案由业务定义", "待定", "自定义", "TBD", "todo", "TODO"
]
def load_fragments():
"""Load ir_fragments.json."""
try:
return config.load_json(config.IR_FRAGMENTS_JSON)
except FileNotFoundError:
print(f"{FAIL} ir_fragments.json 未找到: {config.IR_FRAGMENTS_JSON}")
print(" 请先运行 step2_ir_extraction.py")
sys.exit(1)
def check_non_empty_rules(fragments: list[dict]) -> list[str]:
"""Every fragment must have at least one rule."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
rules = f.get("rules", [])
if not rules:
if f.get("error"):
errors.append(f"{uid}: 提取失败 — {f['error']}")
else:
errors.append(f"{uid}: rules 为空")
return errors
def check_rule_paths(fragments: list[dict]) -> list[str]:
"""Every rule must have a non-empty path array."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
path = rule.get("path", [])
if not path:
errors.append(f"{rid}: path 字段为空或缺失")
elif not isinstance(path, list):
errors.append(f"{rid}: path 必须是数组")
return errors
def check_precondition_fields(fragments: list[dict]) -> list[str]:
"""Every rule must have precondition with geographic_scope and screen_type."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
precond = rule.get("precondition", {})
if not precond:
errors.append(f"{rid}: precondition 缺失")
continue
if not precond.get("geographic_scope"):
errors.append(f"{rid}: precondition.geographic_scope 缺失")
if "screen_type" not in precond:
errors.append(f"{rid}: precondition.screen_type 缺失")
return errors
def check_user_interaction_content(fragments: list[dict]) -> list[str]:
"""user_interaction actions must have non-empty, non-placeholder content."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
for k, action in enumerate(rule.get("actions", [])):
if action.get("type") != "user_interaction":
continue
content = action.get("content", "")
if not content:
errors.append(
f"{rid}.actions[{k}]: user_interaction 的 content 为空"
)
elif any(ph in content for ph in FORBIDDEN_PLACEHOLDERS):
errors.append(
f"{rid}.actions[{k}]: content 包含占位符: '{content}'"
)
return errors
def check_sources_have_logic_tree_nodes(fragments: list[dict]) -> list[str]:
"""Every rule should reference at least one logic tree node in its sources."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
sources = rule.get("sources", [])
has_logic_tree = any(
src.get("type") == "logic_tree" and src.get("node_ids")
for src in sources
)
if not has_logic_tree:
has_text = any(
src.get("type") in ("table", "para") for src in sources
)
if not has_text:
errors.append(f"{rid}: sources 中既无逻辑树引用也无文字引用")
return errors
def check_trigger_conditions(fragments: list[dict]) -> list[str]:
"""Every trigger condition must have signal, operator, value."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
trigger = rule.get("trigger", {})
conditions = trigger.get("conditions", [])
if trigger.get("event") is not None:
continue
for k, cond in enumerate(conditions):
signal = cond.get("signal", "")
operator = cond.get("operator", "")
has_value = "value" in cond
if not signal:
errors.append(f"{rid}.condition[{k}]: 缺少 signal")
if not operator:
errors.append(f"{rid}.condition[{k}]: 缺少 operator")
if not has_value:
errors.append(f"{rid}.condition[{k}]: 缺少 value")
return errors
def check_duplicate_rule_ids(fragments: list[dict]) -> list[str]:
"""Check for duplicate rule_ids across all fragments."""
all_rule_ids = []
for f in fragments:
for rule in f.get("rules", []):
rid = rule.get("rule_id", "")
if rid:
all_rule_ids.append(rid)
duplicates = [rid for rid, count in Counter(all_rule_ids).items() if count > 1]
errors = []
if duplicates:
errors.append(f"重复 rule_id: {duplicates}")
return errors
def check_action_types(fragments: list[dict]) -> list[str]:
"""Verify that actions have valid types."""
valid_types = {"system", "user_interaction"}
errors = []
for f in fragments:
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
for k, action in enumerate(rule.get("actions", [])):
atype = action.get("type", "")
if atype not in valid_types:
errors.append(
f"{rid}.action[{k}]: type='{atype}' 无效, "
f"应为 {valid_types}"
)
if atype == "user_interaction" and "content" not in action:
errors.append(
f"{rid}.action[{k}]: user_interaction 类型缺少 content 字段"
)
return errors
def run_all_tests():
print("=" * 60)
print("Step 2 自检测试")
print("=" * 60)
fragments = load_fragments()
all_errors = []
total_units = len(fragments)
total_rules = sum(len(f.get("rules", [])) for f in fragments)
# Test 1: Non-empty rules
errors = check_non_empty_rules(fragments)
if errors:
print(f"\n{FAIL} 非空规则检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 非空规则检查: 全部通过 ({total_units} 个片段)")
# Test 2: Rule path arrays
errors = check_rule_paths(fragments)
if errors:
print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 规则 path 字段: 全部通过")
# Test 3: Precondition fields
errors = check_precondition_fields(fragments)
if errors:
print(f"\n{FAIL} precondition 字段: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} precondition 字段: 全部通过")
# Test 4: user_interaction content
errors = check_user_interaction_content(fragments)
if errors:
print(f"\n{FAIL} user_interaction content: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} user_interaction content: 全部通过")
# Test 5: Sources have logic tree references
errors = check_sources_have_logic_tree_nodes(fragments)
if errors:
print(f"\n{FAIL} 来源节点引用: {len(errors)} 个规则缺少来源引用")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 来源节点引用: 全部通过")
# Test 6: Trigger conditions completeness
errors = check_trigger_conditions(fragments)
if errors:
print(f"\n{FAIL} 触发条件完整性: {len(errors)} 个条件不完整")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 触发条件完整性: 全部通过")
# Test 7: No duplicate rule_ids
errors = check_duplicate_rule_ids(fragments)
if errors:
print(f"\n{FAIL} rule_id 唯一性: 发现重复")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} rule_id 唯一性: 全部通过")
# Test 8: Valid action types
errors = check_action_types(fragments)
if errors:
print(f"\n{FAIL} 动作类型检查: {len(errors)} 个问题")
for e in errors[:10]:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 动作类型检查: 全部通过")
# Summary
print(f"\n{'='*60}")
total_failures = len(all_errors)
if total_failures == 0:
print(f"{PASS} 所有测试通过!")
else:
print(f"{FAIL} 测试失败: {total_failures} 个错误")
print("\n建议:")
print(" 1. 检查 ir_fragments.json 中出错的规则")
print(" 2. 如果某些功能单元的规则为空,检查上下文包是否丢失了关键信息")
print(" 3. 调整 Prompt (prompts/step2_ir_extraction.txt) 后重新运行")
print(f"\n统计:")
print(f" 功能单元数: {total_units}")
print(f" 规则总数: {total_rules}")
error_units = sum(1 for f in fragments if f.get("error"))
if error_units:
print(f" 提取失败的单元: {error_units}")
return total_failures == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)