document_analyzer/skills/ir_generation_skill/tests/test_step2_5.py

"""
Tests for Stage 2.5 (Branch Coverage Auto-Completion).

Validates:
- Path enumeration exists and is non-empty
- Auto-complete fragments have valid structure
- No duplicate unit_ids in autocomplete fragments
- Path coverage improved after autocomplete (if applicable)
"""

import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent))
import config


PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"


def load_path_enumeration():
    """Load path_enumeration.json."""
    try:
        return config.load_json(config.PATH_ENUM_JSON)
    except FileNotFoundError:
        print(f"{FAIL} path_enumeration.json 未找到: {config.PATH_ENUM_JSON}")
        print("  请先运行 step2_5_branch_coverage.py")
        sys.exit(1)


def load_autocomplete_fragments():
    """Load ir_autocomplete_fragments.json, or return [] if absent."""
    path = config.IR_AUTOCOMPLETE_FRAGMENTS_JSON
    if not Path(path).exists():
        return None
    return config.load_json(path)


def check_path_enumeration(data: dict) -> list[str]:
    """Check path enumeration has valid structure."""
    errors = []
    paths = data.get("logic_tree_paths", {})
    if not paths:
        errors.append("logic_tree_paths 为空")
    total = data.get("total_paths", 0)
    if total <= 0:
        errors.append(f"total_paths = {total}, 期望 > 0")

    for image_id, image_paths in paths.items():
        if not image_paths:
            errors.append(f"{image_id}: 路径列表为空")
            continue
        for i, p in enumerate(image_paths):
            if not p.get("path_id"):
                errors.append(f"{image_id}[{i}]: 缺少 path_id")
            if not p.get("image_id"):
                errors.append(f"{image_id}[{i}]: 缺少 image_id")
            if not p.get("node_ids"):
                errors.append(f"{image_id}[{i}]: 缺少 node_ids")

    return errors


def check_autocomplete_fragments(fragments: list[dict] | None) -> list[str]:
    """Check auto-complete fragments have valid structure."""
    if fragments is None:
        return ["ir_autocomplete_fragments.json 未生成 (可能无需补全)"]

    errors = []
    seen_unit_ids = set()

    for frag in fragments:
        uid = frag.get("unit_id", "")
        if not uid:
            errors.append("fragment 缺少 unit_id")
            continue
        if uid in seen_unit_ids:
            errors.append(f"unit_id '{uid}' 重复")
        seen_unit_ids.add(uid)

        if not frag.get("auto_generated"):
            errors.append(f"{uid}: auto_generated 应为 true")

        rules = frag.get("rules", [])
        for j, rule in enumerate(rules):
            rid = rule.get("rule_id", f"rule[{j}]")
            if not rule.get("path"):
                errors.append(f"{rid}: path 字段缺失")
            precond = rule.get("precondition", {})
            if not precond.get("geographic_scope"):
                errors.append(f"{rid}: precondition.geographic_scope 缺失")

    return errors


def run_all_tests():
    print("=" * 60)
    print("Step 2.5 自检测试")
    print("=" * 60)

    all_errors = []

    # Test 1: Path enumeration exists
    try:
        path_data = load_path_enumeration()
    except SystemExit:
        return False

    errors = check_path_enumeration(path_data)
    if errors:
        print(f"\n{FAIL} 路径枚举检查: {len(errors)} 个错误")
        for e in errors:
            print(f"  - {e}")
        all_errors.extend(errors)
    else:
        total = path_data.get("total_paths", 0)
        n_images = len(path_data.get("logic_tree_paths", {}))
        print(f"\n{PASS} 路径枚举检查: {total} 条路径, {n_images} 个逻辑树")

    # Test 2: Auto-complete fragments
    fragments = load_autocomplete_fragments()
    errors = check_autocomplete_fragments(fragments)

    if fragments is None:
        print(f"\n{WARN} 自动补全片段: 未生成 (可能所有路径已覆盖)")
    elif errors:
        print(f"\n{FAIL} 自动补全片段检查: {len(errors)} 个错误")
        for e in errors[:10]:
            print(f"  - {e}")
        all_errors.extend(errors)
    else:
        auto_rules = sum(len(f.get("rules", [])) for f in fragments)
        print(f"\n{PASS} 自动补全片段检查: "
              f"{len(fragments)} 个片段, {auto_rules} 条规则")

    # Summary
    print(f"\n{'='*60}")
    total_failures = len(all_errors)

    if total_failures == 0:
        print(f"{PASS} 所有测试通过!")
    else:
        print(f"{FAIL} 测试失败: {total_failures} 个错误")

    return total_failures == 0


if __name__ == "__main__":
    success = run_all_tests()
    sys.exit(0 if success else 1)