sync: update all skills from latest workspace code

doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
@@ -0,0 +1,9 @@
+# Generated output
+output/
+
+# Python
+__pycache__/
+*.pyc
+
+# Console log
+Console output.txt
@@ -0,0 +1,137 @@
+"""
+Shared configuration for the IR Generation pipeline.
+Reads API keys from a secrets.yaml file, falling back to environment variables.
+"""
+
+import os
+import json
+import yaml
+
+# ---- Paths ----
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+WORKSPACE_DIR = os.path.dirname(BASE_DIR)
+DOC_PARSER_OUTPUT = os.path.join(WORKSPACE_DIR, "doc_parser_skill", "output")
+PROMPTS_DIR = os.path.join(BASE_DIR, "prompts")
+TESTS_DIR = os.path.join(BASE_DIR, "tests")
+OUTPUT_DIR = os.path.join(BASE_DIR, "output")
+
+# Input file (the parsed PRD JSON)
+_DEFAULT_INPUT = os.path.join(
+    DOC_PARSER_OUTPUT,
+    "车机娱乐系统禁止功能文档_脱敏 v0.9_v2_updated.json",
+)
+INPUT_JSON = os.environ.get("IR_INPUT_JSON", _DEFAULT_INPUT)
+
+
+def set_input_file(path: str) -> None:
+    """Override the default input JSON path."""
+    global INPUT_JSON
+    INPUT_JSON = path
+
+# Secrets file (shared with workspace-document-analyzer)
+# .openclaw/workspace/skills/ir_generation_new_skill -> .openclaw/workspace-document-analyzer
+OPENCLAW_HOME = os.path.dirname(os.path.dirname(WORKSPACE_DIR))
+SECRETS_YAML = os.path.join(
+    OPENCLAW_HOME, "workspace-document-analyzer", "config", "secrets.yaml",
+)
+
+# Intermediate outputs
+SEMANTIC_INDEX_R1_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r1.json")
+SEMANTIC_INDEX_R2_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r2.json")
+SEMANTIC_INDEX_R3_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r3.json")
+SEMANTIC_INDEX_JSON = os.path.join(OUTPUT_DIR, "semantic_index.json")   # merged final
+IR_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_fragments.json")
+PATH_ENUM_JSON = os.path.join(OUTPUT_DIR, "path_enumeration.json")
+IR_AUTOCOMPLETE_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_autocomplete_fragments.json")
+
+# Final deliverables (placed in doc_parser output per spec)
+IR_FINAL_JSON = os.path.join(DOC_PARSER_OUTPUT, "ir_final.json")
+IR_AUDIT_REPORT_MD = os.path.join(DOC_PARSER_OUTPUT, "ir_audit_report.md")
+
+# ---- LLM API ----
+# Choose provider: "deepseek" | "dashscope"
+LLM_PROVIDER = os.environ.get("IR_PROVIDER", "deepseek")
+
+# Model names per provider
+PROVIDER_MODELS = {
+    "deepseek": os.environ.get("IR_MODEL", "deepseek-v4-flash"),
+    "dashscope": os.environ.get("IR_MODEL", "qwen-max"),
+}
+MODEL_NAME = PROVIDER_MODELS.get(LLM_PROVIDER, PROVIDER_MODELS["deepseek"])
+
+# Maximum tokens for LLM responses
+MAX_TOKENS = int(os.environ.get("IR_MAX_TOKENS", "16000"))
+TEMPERATURE = float(os.environ.get("IR_TEMPERATURE", "0.1"))
+
+# ---- Iteration & Quality ----
+MAX_RETRIES_PER_STAGE = int(os.environ.get("IR_MAX_RETRIES", "3"))
+COVERAGE_TARGET = float(os.environ.get("IR_COVERAGE_TARGET", "0.95"))
+
+# Stage 1 ensemble temperatures (parallel multi-temperature generation)
+ENSEMBLE_TEMPERATURES = [
+    float(os.environ.get("IR_ENSEMBLE_T1", "0.0")),
+    float(os.environ.get("IR_ENSEMBLE_T2", "0.3")),
+    float(os.environ.get("IR_ENSEMBLE_T3", "0.7")),
+]
+
+
+def _load_secrets() -> dict[str, dict[str, str]]:
+    """Load provider credentials from secrets.yaml.
+
+    Returns a dict like: {"deepseek": {"apiKey": "...", "baseUrl": "..."}, ...}
+    """
+    if os.path.isfile(SECRETS_YAML):
+        with open(SECRETS_YAML, "r", encoding="utf-8") as f:
+            return yaml.safe_load(f) or {}
+    return {}
+
+
+def _get_provider_config(provider: str) -> dict[str, str]:
+    """Get {apiKey, baseUrl} for a provider from secrets, with env-var fallback."""
+    secrets = _load_secrets()
+    entry = secrets.get(provider, {})
+
+    env_prefix = provider.upper()
+    api_key = (
+        os.environ.get(f"{env_prefix}_API_KEY")
+        or entry.get("apiKey", "")
+    )
+    base_url = (
+        os.environ.get(f"{env_prefix}_BASE_URL")
+        or entry.get("baseUrl", "https://api.deepseek.com/v1")
+    )
+
+    if not api_key:
+        raise RuntimeError(
+            f"No API key found for provider '{provider}'. "
+            f"Check {SECRETS_YAML} or set {env_prefix}_API_KEY."
+        )
+    return {"apiKey": api_key, "baseUrl": base_url}
+
+
+def llm_client():
+    """Return an OpenAI-compatible client configured from secrets.yaml."""
+    from openai import OpenAI
+
+    cfg = _get_provider_config(LLM_PROVIDER)
+    return OpenAI(base_url=cfg["baseUrl"], api_key=cfg["apiKey"])
+
+
+def load_input_document(path: str | None = None) -> dict:
+    """Load the parsed PRD JSON document."""
+    path = path or INPUT_JSON
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def save_json(data, path: str) -> None:
+    """Save data as formatted JSON."""
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+
+def load_json(path: str) -> dict:
+    """Load a JSON file."""
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
@@ -0,0 +1,593 @@
+"""
+Deterministic ensemble merge for semantic index generation.
+
+All functions are pure Python with zero LLM calls. Fully testable with mock data.
+
+Cross-references N semantic_index outputs (generated with different temperatures)
+and produces a single merged index with confidence scores.
+
+Used by: step1_semantic_index.py
+Tested by: tests/test_ensemble_merge.py
+"""
+
+from collections import defaultdict
+from difflib import SequenceMatcher
+
+
+# =============================================================================
+# Concept Name Similarity
+# =============================================================================
+
+def concept_name_similarity(name_a: str, name_b: str) -> float:
+    """Compute similarity between two concept names for cross-version matching.
+
+    Strategy (in order of precedence):
+      1. Exact string match -> 1.0
+      2. Substring containment (one is a substring of the other) -> 0.9
+      3. SequenceMatcher ratio on character sequences -> 0.0-1.0
+
+    Returns:
+        float in [0.0, 1.0] where >= 0.7 means "likely the same concept".
+    """
+    if name_a == name_b:
+        return 1.0
+
+    # Substring containment: one name is contained in the other
+    if name_a in name_b or name_b in name_a:
+        # Only count as similar if they're of comparable length
+        # (avoid matching "国内" with "国内行车娱乐限制")
+        len_ratio = min(len(name_a), len(name_b)) / max(len(name_a), len(name_b))
+        if len_ratio >= 0.5:
+            return 0.85 + 0.05 * len_ratio  # range 0.875-0.90
+        return 0.55  # too different in length → below threshold
+
+    return SequenceMatcher(None, name_a, name_b).ratio()
+
+
+# =============================================================================
+# Concept Clustering & Merging
+# =============================================================================
+
+def cluster_concepts(
+    all_concepts_lists: list[list[dict]],
+    similarity_threshold: float = 0.7,
+) -> list[list[tuple[int, dict]]]:
+    """Group concepts across ensemble versions by name similarity.
+
+    Uses greedy single-pass clustering: for each concept, find the best-matching
+    existing cluster. If max similarity >= threshold, add to it; otherwise,
+    create a new cluster.
+
+    Args:
+        all_concepts_lists: List of concept lists, one per ensemble version.
+                            all_concepts_lists[i] = concepts from version i.
+        similarity_threshold: Minimum name similarity to join a cluster.
+
+    Returns:
+        List of clusters. Each cluster is list of (version_idx, concept_dict).
+    """
+    clusters = []  # type: list[list[tuple[int, dict]]]
+
+    for version_idx, concepts in enumerate(all_concepts_lists):
+        for c in concepts:
+            name = c.get("name", "")
+            if not name:
+                continue
+
+            best_cluster = None
+            best_sim = 0.0
+
+            for cluster in clusters:
+                # Compare against the first member of the cluster (seed)
+                seed_name = cluster[0][1].get("name", "")
+                sim = concept_name_similarity(name, seed_name)
+                if sim > best_sim:
+                    best_sim = sim
+                    best_cluster = cluster
+
+            if best_cluster is not None and best_sim >= similarity_threshold:
+                best_cluster.append((version_idx, c))
+            else:
+                clusters.append([(version_idx, c)])
+
+    return clusters
+
+
+def merge_concept_cluster(
+    cluster: list[tuple[int, dict]],
+    total_versions: int,
+) -> tuple[dict, str]:
+    """Merge a single cluster of matched concepts into one concept dict.
+
+    Rules:
+      - name: Longest name (most specific). Tie-break by lower version_idx.
+      - aliases: Union of all aliases across versions.
+      - defined_in: Union of all defined_in across versions.
+      - parent: Most common non-null parent (voting). Tie-break by lower version_idx.
+
+    Returns:
+        (merged_concept_dict, confidence_level) where confidence is "high"/"medium"/"low".
+    """
+    if not cluster:
+        return {}, "low"
+
+    # --- name: longest (most specific) ---
+    best_name = ""
+    best_name_len = 0
+    for v_idx, c in cluster:
+        n = c.get("name", "")
+        if len(n) > best_name_len:
+            best_name = n
+            best_name_len = len(n)
+        elif len(n) == best_name_len and v_idx < cluster[0][0]:  # lower version idx
+            best_name = n
+
+    # --- aliases: union ---
+    aliases = set()
+    for _, c in cluster:
+        for a in c.get("aliases", []):
+            aliases.add(a)
+
+    # --- defined_in: union ---
+    defined_in = set()
+    for _, c in cluster:
+        for d in c.get("defined_in", []):
+            defined_in.add(d)
+
+    # --- parent: most common non-null parent (vote) ---
+    parent_votes = defaultdict(int)
+    for v_idx, c in cluster:
+        p = c.get("parent")
+        if p is not None:
+            parent_votes[p] += 1
+
+    if parent_votes:
+        best_parent = max(parent_votes, key=lambda p: (parent_votes[p], -1))
+    else:
+        best_parent = None
+
+    # --- confidence ---
+    versions_present = len({v_idx for v_idx, _ in cluster})
+    confidence = compute_confidence_versions(versions_present, total_versions,
+                                              any(v_idx == 0 for v_idx, _ in cluster))
+
+    merged = {
+        "name": best_name,
+        "aliases": sorted(aliases),
+        "defined_in": sorted(defined_in),
+        "parent": best_parent,
+        "confidence": confidence,
+    }
+    return merged, confidence
+
+
+# =============================================================================
+# Unit Similarity Functions
+# =============================================================================
+
+def _collect_logic_tree_nodes(unit: dict) -> set[str]:
+    """Extract the flattened set of all logic tree node IDs from a function_unit."""
+    nodes = set()
+    for src in unit.get("sources", []):
+        if src.get("type") == "logic_tree":
+            nodes.update(src.get("logic_tree_nodes", []))
+    return nodes
+
+
+def unit_node_jaccard(unit_a: dict, unit_b: dict) -> float:
+    """Compute Jaccard similarity on logic tree node sets between two units.
+
+    Jaccard(A, B) = |A ∩ B| / |A ∪ B|. Returns 0.0 if both have no nodes.
+    """
+    nodes_a = _collect_logic_tree_nodes(unit_a)
+    nodes_b = _collect_logic_tree_nodes(unit_b)
+
+    if not nodes_a and not nodes_b:
+        return 0.0
+    if not nodes_a or not nodes_b:
+        return 0.0
+
+    intersection = nodes_a & nodes_b
+    union = nodes_a | nodes_b
+    return len(intersection) / len(union)
+
+
+def path_similarity(path_a: list[str], path_b: list[str]) -> float:
+    """Compute similarity between two path arrays.
+
+    Hybrid approach:
+      - Sequential similarity (order-aware): SequenceMatcher on joined strings.
+      - Set similarity (order-independent): Jaccard on path element sets.
+      - Final score: 0.5 * seq_sim + 0.5 * set_sim
+
+    Returns:
+        float in [0.0, 1.0].
+    """
+    if not path_a and not path_b:
+        return 1.0
+    if not path_a or not path_b:
+        return 0.0
+
+    # Sequential similarity
+    joined_a = "|".join(path_a)
+    joined_b = "|".join(path_b)
+    seq_sim = SequenceMatcher(None, joined_a, joined_b).ratio()
+
+    # Set similarity
+    set_a = set(path_a)
+    set_b = set(path_b)
+    set_sim = len(set_a & set_b) / len(set_a | set_b)
+
+    return 0.5 * seq_sim + 0.5 * set_sim
+
+
+def unit_similarity(unit_a: dict, unit_b: dict) -> float:
+    """Combined similarity between two function_units.
+
+    Weighted combination:
+      - 0.6 * unit_node_jaccard  (primary signal: same logic tree nodes = same rule)
+      - 0.4 * path_similarity    (secondary signal: semantic agreement)
+
+    Returns:
+        float in [0.0, 1.0]. >= 0.5 means "likely the same function_unit".
+    """
+    return 0.6 * unit_node_jaccard(unit_a, unit_b) + 0.4 * path_similarity(
+        unit_a.get("path", []), unit_b.get("path", [])
+    )
+
+
+# =============================================================================
+# Function Unit Clustering & Merging
+# =============================================================================
+
+def cluster_function_units(
+    all_units_lists: list[list[dict]],
+    similarity_threshold: float = 0.5,
+) -> list[list[tuple[int, dict]]]:
+    """Group function_units across ensemble versions by content similarity.
+
+    Lowest-temperature versions are processed first (most stable → cluster seeds).
+    Higher-temperature variants join existing clusters if similar enough.
+
+    Args:
+        all_units_lists: List of unit lists, one per ensemble version.
+        similarity_threshold: Minimum unit_similarity to join a cluster.
+
+    Returns:
+        List of clusters. Each cluster is list of (version_idx, unit_dict).
+    """
+    clusters = []  # type: list[list[tuple[int, dict]]]
+
+    for version_idx, units in enumerate(all_units_lists):
+        for unit in units:
+            best_cluster = None
+            best_sim = 0.0
+
+            for cluster in clusters:
+                # Compare against all members already in the cluster
+                cluster_sim = max(
+                    unit_similarity(unit, existing_unit)
+                    for (_, existing_unit) in cluster
+                )
+                if cluster_sim > best_sim:
+                    best_sim = cluster_sim
+                    best_cluster = cluster
+
+            if best_cluster is not None and best_sim >= similarity_threshold:
+                best_cluster.append((version_idx, unit))
+            else:
+                clusters.append([(version_idx, unit)])
+
+    return clusters
+
+
+def pick_best_representative(
+    cluster: list[tuple[int, dict]],
+) -> dict:
+    """Select the best function_unit from a cluster as the merged representative.
+
+    Scoring formula (all normalized to [0, 1]):
+      - 0.35: Node count (more logic_tree_nodes = more complete trace)
+      - 0.25: Source count (more sources = more evidence)
+      - 0.20: Description length (longer = more detail, capped at 500 chars)
+      - 0.20: Temperature rank (lower version_idx = lower temp = more stable)
+
+    Returns a deep copy of the winning unit dict.
+    """
+    if not cluster:
+        return {}
+
+    # Compute max values for normalization
+    max_nodes = max(
+        len(_collect_logic_tree_nodes(unit)) for _, unit in cluster
+    )
+    max_sources = max(
+        len(unit.get("sources", [])) for _, unit in cluster
+    )
+    max_desc_len = max(
+        len(unit.get("description", "")) for _, unit in cluster
+    )
+    max_version_idx = max(v_idx for v_idx, _ in cluster)
+    num_versions = len(cluster)
+
+    def score(v_idx: int, unit: dict) -> float:
+        nodes = len(_collect_logic_tree_nodes(unit))
+        sources = len(unit.get("sources", []))
+        desc_len = min(len(unit.get("description", "")), 500)
+        temp_rank = 1.0 - (v_idx / max(num_versions, max_version_idx + 1))
+
+        return (
+            0.35 * (nodes / max(1, max_nodes))
+            + 0.25 * (sources / max(1, max_sources))
+            + 0.20 * (desc_len / max(1, max_desc_len))
+            + 0.20 * temp_rank
+        )
+
+    best = max(cluster, key=lambda x: score(x[0], x[1]))
+    return dict(best[1])  # deep-ish copy (1 level)
+
+
+def merge_unit_sources(
+    cluster: list[tuple[int, dict]],
+) -> list[dict]:
+    """Union all sources from units in a cluster, deduplicating by (type, image_id, section).
+
+    When the same source key appears in multiple versions, keeps the one with
+    the most logic_tree_nodes.
+    """
+    # Group by dedup key
+    source_groups = defaultdict(list)
+
+    for v_idx, unit in cluster:
+        for src in unit.get("sources", []):
+            # Build a dedup key
+            src_type = src.get("type", "")
+            if src_type == "logic_tree":
+                key = ("logic_tree", src.get("image_id", ""))
+            else:
+                key = (src_type, src.get("section", ""), src.get("row", ""))
+
+            source_groups[key].append(src)
+
+    # Pick best per group
+    result = []
+    for key, sources in source_groups.items():
+        # Pick the source with the most logic_tree_nodes (if any)
+        best = max(sources, key=lambda s: len(s.get("logic_tree_nodes", [])))
+        result.append(dict(best))
+
+    return result
+
+
+def compute_confidence_versions(
+    versions_present: int,
+    total_versions: int,
+    includes_lowest_temp: bool = False,
+) -> str:
+    """Compute 3-level confidence based on cross-version agreement.
+
+    - "high": Appears in all versions, OR >= 2/3 with lowest-temp version (T=0.0).
+    - "medium": Appears in >= half the versions but not all.
+    - "low": Appears in fewer than half (singleton in ensemble).
+
+    Args:
+        versions_present: Number of versions this item appeared in.
+        total_versions: Total number of ensemble versions.
+        includes_lowest_temp: Whether the item appeared in the T=0.0 version.
+    """
+    ratio = versions_present / total_versions
+
+    if ratio >= 1.0:
+        return "high"
+    if ratio >= 0.5 and includes_lowest_temp:
+        return "high"
+    if ratio >= 0.5:
+        return "medium"
+    return "low"
+
+
+def ensemble_merge_concepts(
+    all_concepts_lists: list[list[dict]],
+) -> list[dict]:
+    """Merge concepts across all ensemble versions.
+
+    Returns:
+        List of merged concept dicts, each with added "confidence" field.
+    """
+    total = len(all_concepts_lists)
+    clusters = cluster_concepts(all_concepts_lists)
+    merged = []
+    seen_names = set()
+
+    for cluster in clusters:
+        concept, confidence = merge_concept_cluster(cluster, total)
+        name = concept.get("name", "")
+        if name and name not in seen_names:
+            concept["ensemble_support"] = f"{len({v for v, _ in cluster})}/{total}"
+            merged.append(concept)
+            seen_names.add(name)
+
+    # Sort: high confidence first, then by name
+    conf_order = {"high": 0, "medium": 1, "low": 2}
+    merged.sort(key=lambda c: (conf_order.get(c.get("confidence", "low"), 3), c.get("name", "")))
+
+    # Validate and fix parent references
+    merged = _validate_concept_parents(merged)
+
+    return merged
+
+
+def _validate_concept_parents(concepts: list[dict]) -> list[dict]:
+    """Post-merge: validate that every concept's parent exists in the list.
+
+    Strategy for dangling parents:
+      1. Fuzzy match (concept_name_similarity >= 0.7) → fix reference
+      2. No match → set parent to null, downgrade confidence to "low"
+    """
+    concept_names = {c["name"] for c in concepts}
+    conf_order = {"high": 0, "medium": 1, "low": 2}
+
+    for c in concepts:
+        parent = c.get("parent")
+        if parent is None:
+            continue
+        if parent in concept_names:
+            continue
+
+        # Dangling parent — try fuzzy match
+        best_match = None
+        best_sim = 0.0
+        for name in concept_names:
+            sim = concept_name_similarity(parent, name)
+            if sim > best_sim:
+                best_sim = sim
+                best_match = name
+
+        if best_match and best_sim >= 0.7:
+            c["parent"] = best_match
+            # Downgrade if match was fuzzy (not exact)
+            if best_sim < 1.0:
+                current_conf = c.get("confidence", "low")
+                c["confidence"] = _downgrade_confidence(current_conf)
+        else:
+            c["parent"] = None
+            c["confidence"] = _downgrade_confidence(c.get("confidence", "low"))
+
+    # Re-sort after confidence changes
+    concepts.sort(key=lambda c: (conf_order.get(c.get("confidence", "low"), 3), c.get("name", "")))
+    return concepts
+
+
+def _downgrade_confidence(current: str) -> str:
+    """Drop confidence one level."""
+    if current == "high":
+        return "medium"
+    return "low"
+
+
+def ensemble_merge_function_units(
+    all_units_lists: list[list[dict]],
+) -> list[dict]:
+    """Merge function_units across all ensemble versions.
+
+    1. Cluster units across versions.
+    2. For each cluster: pick best, merge sources, compute confidence.
+    3. Reassign stable unit_ids: FU-ENS-001, FU-ENS-002, ...
+
+    Returns:
+        List of merged function_unit dicts with added "confidence",
+        "ensemble_support", "source_versions" fields.
+    """
+    total = len(all_units_lists)
+    clusters = cluster_function_units(all_units_lists)
+
+    merged = []
+    for cluster in clusters:
+        # Pick best representative
+        best = pick_best_representative(cluster)
+
+        # Merge sources from all cluster members
+        best["sources"] = merge_unit_sources(cluster)
+
+        # Compute confidence
+        versions_present = len({v_idx for v_idx, _ in cluster})
+        includes_t0 = any(v_idx == 0 for v_idx, _ in cluster)
+        confidence = compute_confidence_versions(
+            versions_present, total, includes_t0
+        )
+
+        best["confidence"] = confidence
+        best["ensemble_support"] = f"{versions_present}/{total}"
+        best["source_versions"] = versions_present
+
+        merged.append(best)
+
+    # Sort by confidence desc, then by unit_id
+    conf_order = {"high": 0, "medium": 1, "low": 2}
+    merged.sort(key=lambda u: (conf_order.get(u.get("confidence", "low"), 3),
+                                 u.get("unit_id", "")))
+
+    # Reassign stable unit_ids
+    for i, unit in enumerate(merged):
+        # Preserve original unit_id for traceability
+        if "original_unit_id" not in unit:
+            unit["original_unit_id"] = unit.get("unit_id", "")
+        unit["unit_id"] = f"FU-ENS-{i + 1:03d}"
+
+    return merged
+
+
+# =============================================================================
+# Top-Level Ensemble Merge
+# =============================================================================
+
+def ensemble_merge(
+    semantic_indices: list[dict],
+) -> dict:
+    """Merge N semantic index outputs into one ensemble result.
+
+    Args:
+        semantic_indices: List of semantic_index dicts from each temperature run.
+                          semantic_indices[0] should be the lowest-temperature version.
+
+    Returns:
+        Merged semantic_index dict with structure:
+        {
+            "feature_name": str,
+            "ensemble_versions": int,
+            "concepts": [...],
+            "function_units": [...],
+            "confidence_summary": {...},
+        }
+    """
+    if not semantic_indices:
+        return {
+            "feature_name": "",
+            "ensemble_versions": 0,
+            "concepts": [],
+            "function_units": [],
+            "confidence_summary": {},
+        }
+
+    total = len(semantic_indices)
+
+    # Extract concepts and function_units from each version
+    all_concepts = [si.get("concepts", []) for si in semantic_indices]
+    all_units = [si.get("function_units", []) for si in semantic_indices]
+
+    # Merge
+    merged_concepts = ensemble_merge_concepts(all_concepts)
+    merged_units = ensemble_merge_function_units(all_units)
+
+    # Feature name: majority vote across versions
+    feature_names = [si.get("feature_name", "") for si in semantic_indices]
+    name_counts = defaultdict(int)
+    for fn in feature_names:
+        if fn:
+            name_counts[fn] += 1
+    feature_name = max(name_counts, key=name_counts.get) if name_counts else ""
+
+    # Confidence summary
+    unit_conf = defaultdict(int)
+    for u in merged_units:
+        unit_conf[u.get("confidence", "low")] += 1
+    concept_conf = defaultdict(int)
+    for c in merged_concepts:
+        concept_conf[c.get("confidence", "low")] += 1
+
+    return {
+        "feature_name": feature_name,
+        "ensemble_versions": total,
+        "concepts": merged_concepts,
+        "function_units": merged_units,
+        "confidence_summary": {
+            "total_units": len(merged_units),
+            "high": unit_conf.get("high", 0),
+            "medium": unit_conf.get("medium", 0),
+            "low": unit_conf.get("low", 0),
+            "total_concepts": len(merged_concepts),
+            "concept_high": concept_conf.get("high", 0),
+            "concept_medium": concept_conf.get("medium", 0),
+            "concept_low": concept_conf.get("low", 0),
+        },
+    }
@@ -0,0 +1,157 @@
+"""
+IR Generation Pipeline Orchestrator.
+
+Run all four stages sequentially:
+  python main.py [--skip-step1] [--skip-step2] [--skip-step2.5] [--skip-step3] [--test-only]
+
+The pipeline reads the parsed PRD JSON from doc_parser and produces:
+  - ir_final.json:      the final IR rules
+  - ir_audit_report.md: completeness audit report for human review
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import config
+
+BASE_DIR = Path(__file__).parent
+
+
+def _subprocess_env(extra: dict | None = None) -> dict:
+    """Build environment dict for subprocesses, carrying forward overrides."""
+    env = os.environ.copy()
+    env.update(extra or {})
+    return env
+
+
+def run_step(script_name: str, description: str, extra_env: dict | None = None) -> bool:
+    """Run a single pipeline step script, return True if it succeeded."""
+    print(f"\n{'#' * 60}")
+    print(f"# {description}")
+    print(f"{'#' * 60}")
+    script_path = BASE_DIR / script_name
+    if not script_path.exists():
+        print(f"错误: 脚本不存在 {script_path}")
+        return False
+    result = subprocess.run(
+        [sys.executable, str(script_path)],
+        cwd=str(BASE_DIR),
+        env=_subprocess_env(extra_env),
+    )
+    return result.returncode == 0
+
+
+def run_test(test_name: str, description: str, extra_env: dict | None = None) -> bool:
+    """Run a test script, return True if all tests passed."""
+    print(f"\n{'='*60}")
+    print(f"测试: {description}")
+    print(f"{'='*60}")
+    test_path = BASE_DIR / "tests" / test_name
+    if not test_path.exists():
+        print(f"错误: 测试脚本不存在 {test_path}")
+        return False
+    result = subprocess.run(
+        [sys.executable, str(test_path)],
+        cwd=str(BASE_DIR),
+        env=_subprocess_env(extra_env),
+    )
+    return result.returncode == 0
+
+
+def main():
+    parser = argparse.ArgumentParser(description="IR Generation Pipeline")
+    parser.add_argument("--skip-step1", action="store_true",
+                        help="跳过阶段一（语义索引）")
+    parser.add_argument("--skip-step2", action="store_true",
+                        help="跳过阶段二（IR 提取）")
+    parser.add_argument("--skip-step2.5", "--skip-step2-5", action="store_true",
+                        dest="skip_step2_5",
+                        help="跳过阶段2.5（分支覆盖自动补全）")
+    parser.add_argument("--skip-step3", action="store_true",
+                        help="跳过阶段三（合并与审计）")
+    parser.add_argument("--test-only", action="store_true",
+                        help="仅运行测试，不调用 LLM")
+    parser.add_argument(
+        "--input", "-i", type=str, default=None,
+        help="输入 JSON 文件路径（覆盖默认的 doc_parser 输出）"
+    )
+    parser.add_argument(
+        "--provider", "-p", type=str, default=None,
+        help="LLM provider: deepseek | dashscope（覆盖 IR_PROVIDER 环境变量）"
+    )
+    args = parser.parse_args()
+
+    # Build extra env vars for subprocesses
+    extra_env = {}
+    if args.input:
+        extra_env["IR_INPUT_JSON"] = args.input
+        print(f"输入文件: {args.input}")
+    if args.provider:
+        extra_env["IR_PROVIDER"] = args.provider
+        print(f"LLM Provider: {args.provider}")
+
+    if args.test_only:
+        all_ok = True
+        all_ok &= run_test("test_step1.py", "Step 1 验证", extra_env)
+        all_ok &= run_test("test_step2.py", "Step 2 验证", extra_env)
+        all_ok &= run_test("test_step2_5.py", "Step 2.5 验证", extra_env)
+        all_ok &= run_test("test_step3.py", "Step 3 验证", extra_env)
+        sys.exit(0 if all_ok else 1)
+
+    failures = []
+
+    # Stage 1
+    if not args.skip_step1:
+        ok = run_step("step1_semantic_index.py",
+                       "阶段一：宏观语义索引", extra_env)
+        if not ok:
+            failures.append("阶段一")
+            print("\n阶段一失败，停止流水线。修复后重试。")
+            sys.exit(1)
+        run_test("test_step1.py", "Step 1 验证", extra_env)
+
+    # Stage 2
+    if not args.skip_step2:
+        ok = run_step("step2_ir_extraction.py",
+                       "阶段二：逐功能单元 IR 提取", extra_env)
+        if not ok:
+            failures.append("阶段二")
+            print("\n阶段二失败，停止流水线。修复后重试。")
+            sys.exit(1)
+        run_test("test_step2.py", "Step 2 验证", extra_env)
+
+    # Stage 2.5
+    if not args.skip_step2_5:
+        ok = run_step("step2_5_branch_coverage.py",
+                       "阶段2.5：分支覆盖自动补全", extra_env)
+        if not ok:
+            failures.append("阶段2.5")
+            print("\n阶段2.5失败，停止流水线。修复后重试。")
+            sys.exit(1)
+        run_test("test_step2_5.py", "Step 2.5 验证", extra_env)
+
+    # Stage 3
+    if not args.skip_step3:
+        ok = run_step("step3_merge_and_audit.py",
+                       "阶段三：确定性合并与完整性校验", extra_env)
+        if not ok:
+            failures.append("阶段三")
+            sys.exit(1)
+        run_test("test_step3.py", "Step 3 验证", extra_env)
+
+    if failures:
+        print(f"\n失败阶段: {', '.join(failures)}")
+        sys.exit(1)
+
+    print(f"\n{'='*60}")
+    print("流水线全部完成!")
+    print(f"最终 IR: {config.IR_FINAL_JSON}")
+    print(f"审计报告: {config.IR_AUDIT_REPORT_MD}")
+    print(f"{'='*60}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,46 @@
+## 上一轮遗漏分析
+
+上一轮生成的语义索引经过自动校验，发现以下问题需要修正：
+
+### 遗漏的逻辑树路径
+以下逻辑树决策路径未被任何 function_unit 覆盖，请为每条路径生成对应的 function_unit：
+{missing_paths}
+
+### 遗漏的概念
+以下关键概念未在 concepts 列表中出现，请补充：
+{missing_concepts}
+
+### 格式问题
+以下 function_unit 或 concept 的格式不符合要求：
+{format_issues}
+
+### concept parent 问题
+以下概念的 parent 引用有问题（悬空引用或缺少 parent）：
+{parent_issues}
+
+---
+
+请在本次生成中针对以上问题进行修正。注意：
+1. 你不需要从头生成完整的语义索引，只需要输出**补充和修正**的部分
+2. function_units 的输出应只包含本次新增或修正的单元（已有的正确单元不需要重复）
+3. concepts 的输出应只包含本次新增或修正的概念
+4. 如果格式问题中提到"空壳单元"：删除该 unit，或将其合并到包含实际 action 的 unit 中。纯开关状态不是独立的功能行为
+5. 如果格式问题中提到"不构成有效路径"：说明你引用了互斥分支上的节点。检查 logic_tree_nodes，确保它们都落在逻辑树的**同一条分支路径**上（例如 n4 是关闭分支，n8 是开启分支，不能共存）
+6. 如果格式问题提到"缺少 path"或"缺少 sources"：补充对应字段
+
+## 输出格式
+
+只输出 JSON：
+
+{
+  "feature_name": "（与之前相同）",
+  "supplemental_function_units": [
+    // 只放新增的或修正的 function_unit
+  ],
+  "supplemental_concepts": [
+    // 只放新增的或修正的 concept
+  ],
+  "corrections": {
+    // 需要修正的已有项: { "unit_id或concept_name": { 修正后的字段 }, ... }
+  }
+}
@@ -0,0 +1,123 @@
+你是吉利汽车车机系统（XX Auto）的产品需求分析师。你的任务是从行车娱乐限制功能 PRD 文档中提取"语义索引"——一份结构化、有层级的功能清单，而不是逐字翻译。
+
+## 文档结构说明
+
+下面是一份 Word 文档的解析结果，包含：
+
+1. **sections**：按章节组织的混合内容（段落 + 表格），每个 section 有 `source`（章节标题）、`blocks`（`para` 文本段落和 `table` 结构表格）、`images`（引用的图片 ID 列表）
+2. **image_analysis**：文档中流程图的程序化分析结果，其中 `logic_tree` 是由节点组成的决策树：
+   - `state` 节点：状态说明
+   - `decision` 节点：判断条件 + `branches`（分支值 → 目标节点 ID）
+   - `action` 节点：系统或用户交互动作
+3. **resolved_conflicts**：文档中图文冲突的仲裁结果，明确指出应以文字还是图片为准
+
+## 文档全文
+
+{document_json}
+
+## 你的任务
+
+阅读整份文档后，输出一份 **语义索引 JSON**，包含：
+
+### 1. feature_name
+功能名称，如"行车娱乐限制"
+
+### 2. concepts（带层级）
+文档中定义或使用的关键概念列表。每个概念包含：
+- `name`：概念的标准名称（必填）
+- `aliases`：同义词/别名列表（如"行车娱乐限制"、"行车娱乐禁止"）
+- `defined_in`：定义该概念的章节号列表（如 ["3.1", "3.1.1"]）
+- `parent`：父概念名称（字符串或 null）（必填）
+
+**概念层级规则（重要）**：
+你必须按照以下 4 层结构组织概念，并为每个概念指定正确的 `parent`：
+- **Level 0（地理范围）**: "国内"、"海外" — parent 为 null
+- **Level 1（功能）**: "行车娱乐限制"、"行车娱乐禁止" — parent 为对应的 scope（如 "国内"）
+- **Level 2（限制方式）**: "系统限制"、"SDK限制"、"其他应用" — parent 为对应的 feature
+- **Level 3（具体行为）**: "前台打断"、"后台限制启动"、"后台暂停功能"、"无限制" — parent 为对应的 method
+
+除了以上层级，还可以有"行车娱乐限制开关"、"车速条件"、"档位条件"、"Toast提示"等辅助概念，它们应有合理的 parent。
+
+**重要约束：每个 concept 的 parent 值必须是 concepts 列表中已存在的另一个 concept 的 name，或者是 null。禁止引用不存在的概念名。**
+
+### 3. function_units（带路径）
+文档中描述的所有主要功能行为的列表。**每个 function_unit 对应逻辑树中的一条叶子路径**。每个 function unit 包含：
+
+- `unit_id`：唯一标识，格式 "FU-001", "FU-002"...
+- `name`：简短名称，如"国内-系统限制-前台-行车打断"
+- `description`：1-3 句描述该规则的行为
+- `path`：层级路径数组，从高到低，如 `["国内", "系统限制", "前台打断"]`（必填）。**path 中的每个元素必须是 concepts 列表中已存在的概念名。**
+- `sources`：该规则在文档中的来源锚点列表，每项包含：
+  - `section`：章节号
+  - `type`：来源类型，`"table"` 或 `"para"` 或 `"logic_tree"`
+  - `row`：如果是表格行（从 1 开始）
+  - `text_snippet`：前 200 字的关键文字
+  - `image_id`：如果是逻辑树来源，填写图片 rId
+  - `logic_tree_nodes`：如果是逻辑树来源，列出相关节点 ID 列表
+
+## function_units 分解策略（重要）
+
+**按逻辑树的每条叶子路径生成一个 function_unit**：
+
+1. **叶子路径 = 从根节点到叶子节点（end 类型）的完整决策链**，包含路径上所有中间节点和叶子节点的最终动作
+2. **每条叶子路径对应一个 function_unit**：不同决策分支导向不同叶子节点 → 不同的 function_unit
+3. **"不受限"叶子节点也必须建模**：即使 action 是"不执行任何限制操作"，也要创建对应的 function_unit
+4. **禁止合并不同叶子节点**：不要将多个不同叶子节点的结果合并到一个 function_unit（除非它们触发完全相同的动作且属于同一父分支）
+5. **文字描述中的功能单独列出**：对于无法对应到逻辑树节点的功能（如纯文字描述的功能行为），用 table/para 类型 source，path 用语义路径
+6. **非流程图的图片也可能包含功能行为**：rId18 等图片的描述文本中可能包含功能规则（如"使用语音打开受限应用"），同样需要提取为 function_unit
+
+**重要：不要创建纯开关/状态的空壳 unit**。"开关开启"本身不是一个功能行为（它没有 action），它是其他单元的 precondition。如果一个 function_unit 的 path 只有 `["国内", "开关开启"]` 且 sources 中只有 n1/n2/n3 这样的根/开关节点，说明它不是真正的功能单元，不应该输出。
+
+{feedback}
+
+## 权威性规则
+
+1. **逻辑树（流程图）是权威来源**：逻辑树定义了功能的确切行为。识别 function_unit 时必须优先按逻辑树路径建模。文字和表格用于补充描述、提供确切措辞（如 Toast 文案），但不应覆盖或曲解逻辑树路径。
+
+2. **logic_tree_nodes 必须构成有效路径**：每个 function_unit 引用的 logic_tree_nodes 列表，必须对应逻辑树中的**一条连通路径**。禁止将互斥分支上的节点混入同一个 source（例如 n4 是"开关关闭"分支，n8 是"开关开启"分支的下游节点，它们不能出现在同一 function_unit 中）。
+
+3. **resolved_conflicts 中的仲裁是最终决定**：如果文档有图文冲突且已仲裁，严格按仲裁结果处理。
+
+4. **逻辑树路径应全部覆盖**：下面是程序从文档逻辑树中枚举的全部决策路径，请逐一确认每条路径都有对应的 function_unit：
+
+{logic_tree_paths}
+
+## 关键要求
+
+1. **必须覆盖所有逻辑树路径**：上面列出的每条路径必须被至少一个 function_unit 的 sources 引用。
+
+2. **必须覆盖表格中的所有规则**：表格中列出的每种"限制方法"、"限制规则"都要有对应的 function_unit。
+
+3. **区分"限制"与"禁止"**：文档中"行车娱乐限制"（前台应用打断）和"行车娱乐禁止"（后台应用启动限制）是两个不同的子场景，必须分别建模。
+
+4. **区分不同应用类型**：系统限制、SDK 限制、其他应用的行为路径不同，必须分别建模。
+
+5. **包含开关状态**：开关"开启"和"关闭"两种状态下的行为都要覆盖。
+
+6. **概念和路径必须有层级**：每个 concept 指定正确的 parent；每个 function_unit 输出 path 数组。
+
+## 输出格式
+
+**只输出 JSON，不要有 markdown 代码块标记或其他文字**：
+
+{
+  "feature_name": "...",
+  "concepts": [
+    {"name": "国内", "aliases": [], "defined_in": ["2.7", "3.1"], "parent": null},
+    {"name": "行车娱乐限制", "aliases": [], "defined_in": ["3.1", "3.1.1"], "parent": "国内"},
+    ...
+  ],
+  "function_units": [
+    {
+      "unit_id": "FU-001",
+      "name": "国内-系统限制-前台-行车打断",
+      "description": "...",
+      "path": ["国内", "系统限制", "前台打断"],
+      "sources": [
+        {"section": "3.1.1", "type": "table", "row": 2, "text_snippet": "打断：车速>=15km/h且持续5秒后..."},
+        {"image_id": "rId16", "type": "logic_tree", "logic_tree_nodes": ["n2","n3","n8","n19","n21","n23","n25","n26"]}
+      ]
+    },
+    ...
+  ]
+}
@@ -0,0 +1,200 @@
+你是吉利汽车车机系统的需求分析专家。你的任务是基于给定的精准上下文包，为单个功能单元（Function Unit）提取详细的 **IR 规则（Intermediate Representation Rule）**。
+
+## 上下文
+
+下面是一个功能单元的精准上下文包，包含了从原始需求文档中提取的相关文字、表格和逻辑树：
+
+### 功能单元概要
+- **unit_id**: {unit_id}
+- **unit_name**: {unit_name}
+- **unit_description**: {unit_description}
+
+### 相关文字段落
+{texts}
+
+### 相关表格
+{tables}
+
+### 相关逻辑树
+{logic_trees}
+
+### 图文冲突仲裁（如有）
+{resolved_conflicts}
+
+## IR Schema
+
+你需要为这个功能单元输出一个 **规则数组（rules）**。每条规则遵循以下 schema：
+
+```json
+{{
+  "rule_id": "{unit_id}-DOMESTIC-SYS-FG-INTERRUPT-01",
+  "path": ["国内", "系统限制", "前台打断"],
+  "description": "国内车型，开关开启，系统限制类应用在前台，车速>=15km/h且持续>5秒且非P档时，系统打断应用前台进程、将应用调入后台，显示Toast'在行车状态下无法使用该应用'",
+  "priority": "P0",
+  "sources": [
+    {{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "打断：车速>=15km/h且持续5秒后..."}},
+    {{"type": "logic_tree", "image_id": "rId16", "node_ids": ["n2","n3","n8","n19","n21","n23","n25","n26"], "priority": "primary_source"}}
+  ],
+  "precondition": {{
+    "geographic_scope": "国内",
+    "screen_type": "any",
+    "switch": "开启",
+    "app_type": "系统限制",
+    "app_state": "前台"
+  }},
+  "trigger": {{
+    "operator": "AND",
+    "conditions": [
+      {{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}},
+      {{"signal": "车速_持续时间", "operator": ">", "value": 5, "unit": "秒"}},
+      {{"signal": "档位", "operator": "!=", "value": "P"}}
+    ]
+  }},
+  "actions": [
+    {{"type": "system", "description": "打断应用前台进程"}},
+    {{"type": "system", "description": "将应用调入后台"}},
+    {{"type": "user_interaction", "description": "显示Toast", "content": "在行车状态下无法使用该应用"}}
+  ]
+}}
+```
+
+## 字段说明（必读）
+
+1. **rule_id**: 格式为 `{unit_id}-SCOPE-METHOD-BEHAVIOR-NN`，其中：
+   - SCOPE: DOMESTIC（国内）| OVERSEAS（海外）
+   - METHOD: SYS（系统限制）| SDK（SDK限制）| OTHER（其他应用）
+   - BEHAVIOR: FG-INTERRUPT（前台打断）| BG-BLOCK（后台限制启动）| BG-PAUSE（后台暂停功能）| NO-RESTRICT（无限制）| SWITCH-OFF（开关关闭）
+   - NN: 序号从 01 开始
+
+2. **path**: 层级路径数组（必填）。从 scope 到 behavior 逐级列出，如 `["国内", "系统限制", "前台打断"]`。此字段用于程序化遍历所有功能点。
+
+3. **description**: 完整但简洁地描述整个规则，必须包含：地理范围 + 开关状态 + 应用类型 + 前后台状态 + 触发条件 + 所有动作。人读取此字段即可设计测试用例。
+
+4. **priority**: P0（核心安全规则）、P1（重要规则）、P2（边界情况）。
+
+5. **sources**: 每条规则必须列出所有数据来源。逻辑树类型的 source 必须标记 `"priority": "primary_source"`。文字/表格类型的 source 标记 `"priority": "supplementary"`。**node_ids 必须列举该规则在逻辑树中经历的所有 decision 和 action 节点。**
+
+6. **precondition**: 规则生效的前置状态条件。必须包含以下字段：
+   - `geographic_scope`（必填）："国内" | "海外"
+   - `screen_type`（必填）："CSD" | "PSD" | "RFD" | "any"（如文档未区分屏幕类型则填 "any"）
+   - `switch`：开关状态（"开启" | "关闭"）
+   - `app_type`：应用类型
+   - `app_state`：应用前后台状态（"前台" | "后台"）
+   如某字段不适用，可省略。
+
+7. **trigger**: 触发条件对象：
+   - `operator`: "AND" | "OR"
+   - `conditions`: 条件数组，每个条件必须有 `signal`、`operator`、`value`。有单位加 `unit`。
+   - 如为瞬时事件（用户点击），用 `event` 字段。
+
+8. **actions**: 每个动作必须有 `type`（"system" | "user_interaction"）和 `description`。
+   - `"user_interaction"` 类型必须有 `content` 字段，填写**确切的提示文案**。
+   - **禁止使用占位符**：content 不能是"文案由业务定义"、"待定"、"自定义"等。如果文档中给出了文案，必须原样填入。如果文档确实未给出文案，填写 `"（文档未指定）"` 并标注。
+
+## Few-shot 示例
+
+### 示例 1：行车娱乐限制（前台打断）
+
+**输入上下文**：国内车型，开关开启，系统限制类应用在前台，车速>=15km/h且持续>5秒且非P档时，打断应用并显示Toast"在行车状态下无法使用该应用"。
+
+**期望输出**：
+
+```json
+{{
+  "rule_id": "FU-001-DOMESTIC-SYS-FG-INTERRUPT-01",
+  "path": ["国内", "系统限制", "前台打断"],
+  "description": "国内车型，开关开启，系统限制类应用在前台，当车速>=15km/h且持续超过5秒且非P档时，系统打断应用前台进程、将应用调入后台，并弹出Toast提示'在行车状态下无法使用该应用'",
+  "priority": "P0",
+  "sources": [
+    {{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "行车娱乐限制：目标应用/功能处于前台时 ○ 打断：车速>=15km/h且持续5秒后...", "priority": "supplementary"}},
+    {{"type": "logic_tree", "image_id": "rId16", "node_ids": ["n2","n3","n8","n19","n21","n23","n25","n26"], "priority": "primary_source"}}
+  ],
+  "precondition": {{
+    "geographic_scope": "国内",
+    "screen_type": "any",
+    "switch": "开启",
+    "app_type": "系统限制",
+    "app_state": "前台"
+  }},
+  "trigger": {{
+    "operator": "AND",
+    "conditions": [
+      {{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}},
+      {{"signal": "车速_持续时间", "operator": ">", "value": 5, "unit": "秒"}},
+      {{"signal": "档位", "operator": "!=", "value": "P"}}
+    ]
+  }},
+  "actions": [
+    {{"type": "system", "description": "打断应用前台进程"}},
+    {{"type": "system", "description": "将应用调入后台"}},
+    {{"type": "user_interaction", "description": "显示Toast", "content": "在行车状态下无法使用该应用"}}
+  ]
+}}
+```
+
+### 示例 2：行车娱乐禁止（后台启动拦截）
+
+**输入上下文**：国内车型，开关开启，应用在后台，非P档时阻止应用启动，提示"请在P挡时使用该功能/应用"。
+
+**期望输出**：
+
+```json
+{{
+  "rule_id": "FU-002-DOMESTIC-SYS-BG-BLOCK-01",
+  "path": ["国内", "系统限制", "后台限制启动"],
+  "description": "国内车型，开关开启，目标应用处于后台，当用户尝试启动应用且档位非P档时，系统限制应用/功能启用，并弹出Toast提示'请在P挡时使用该功能/应用'",
+  "priority": "P0",
+  "sources": [
+    {{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "行车娱乐禁止：目标应用/功能处于后台时 ○ 限制：非P挡时，限制目标应用/功能启用...", "priority": "supplementary"}},
+    {{"type": "logic_tree", "image_id": "rId17", "node_ids": ["n1","n2","n5","n7"], "priority": "primary_source"}}
+  ],
+  "precondition": {{
+    "geographic_scope": "国内",
+    "screen_type": "any",
+    "switch": "开启",
+    "app_state": "后台"
+  }},
+  "trigger": {{
+    "operator": "AND",
+    "conditions": [
+      {{"signal": "应用请求启动", "operator": "==", "value": true}},
+      {{"signal": "档位", "operator": "!=", "value": "P"}}
+    ]
+  }},
+  "actions": [
+    {{"type": "system", "description": "限制应用/功能启用"}},
+    {{"type": "user_interaction", "description": "显示Toast", "content": "请在P挡时使用该功能/应用"}}
+  ]
+}}
+```
+
+## 关键要求
+
+1. **逻辑树为唯一权威来源**：触发条件和动作序列必须严格按逻辑树路径建模。文字/表格描述仅用于补充确切措辞（如 Toast 文案），不得覆盖或曲解逻辑树路径。在 sources 中，逻辑树类型标记 `"priority": "primary_source"`，文字/表格标记 `"priority": "supplementary"`。
+
+2. **信号和数值必须精确**：禁止写"车速超过阈值"，必须写 `{{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}}`。
+
+3. **条件必须完整**：逻辑树中的每个 decision 条件必须对应 trigger.conditions 中的一条。如果文档说"车速>=15km/h 且持续超过5秒 且非P档"，这三个条件必须全部出现。
+
+4. **每条规则必须自包含**：人仅凭一条 rule JSON 就能设计出对应的测试用例。必须包含：geographic_scope、screen_type、开关状态、应用类型、前后台状态、完整触发条件、所有动作及确切 Toast 文案、来源引用。
+
+5. **禁止占位符**：`"user_interaction"` 类型的 `content` 不能是"文案由业务定义"、"待定"、"自定义"。如文档确实未给出文案，填 `"（文档未指定）"`。
+
+6. **逻辑树节点必须追踪**：在 sources 中列出该规则在逻辑树中经历的所有 decision 节点和 action 节点。
+
+7. **多条规则**：如果一个功能单元包含多个独立行为分支，输出多条规则分别描述。
+
+8. **开关关闭状态**：开关关闭时所有限制失效，这也必须作为一条规则输出（path: ["...", "开关关闭", "无限制"]）。
+
+{format_feedback}
+
+## 输出格式
+
+**只输出 JSON 数组，不要有任何其他文字或 markdown 标记**：
+
+[
+  {{ ... }},
+  {{ ... }}
+]
+
+注意：即使只有一个规则，也必须用数组格式 `[...]`。
@@ -1,105 +0,0 @@
-import logging
-import os
-import time
-from typing import Optional
-
-from openai import OpenAI
-
-logger = logging.getLogger(__name__)
-
-
-class LLMClient:
-    """Low-level OpenAI-compatible LLM client with retry and token tracking.
-
-    Usage::
-
-        llm = LLMClient()
-        content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
-        print(llm.usage)
-    """
-
-    IMAGE_MODEL = "qwen3-vl-plus"
-    TEXT_MODEL = "qwen3.5-flash-2026-02-23"
-    TIMEOUT = 120
-    MAX_RETRIES = 3
-
-    def __init__(
-        self,
-        *,
-        base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
-        timeout: int | None = None,
-    ):
-        key = os.environ.get("DASHSCOPE_API_KEY", "")
-        if not key:
-            raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
-        self._client = OpenAI(api_key=key, base_url=base_url)
-        self._timeout = timeout or self.TIMEOUT
-        self._prompt_tokens = 0
-        self._completion_tokens = 0
-
-    @property
-    def usage(self) -> dict:
-        """Return accumulated token counts as ``{prompt, completion, total}``."""
-        return {
-            "prompt_tokens": self._prompt_tokens,
-            "completion_tokens": self._completion_tokens,
-            "total_tokens": self._prompt_tokens + self._completion_tokens,
-        }
-
-    @staticmethod
-    def estimate_tokens(text: str) -> int:
-        """Quick token estimate.  CJK ≈1.7/token, others ≈3.0/token."""
-        cjk = sum(1 for c in text if '一' <= c <= '鿿' or '　' <= c <= '〿')
-        other = len(text) - cjk
-        return max(1, int(cjk / 1.7 + other / 3.0))
-
-    @staticmethod
-    def estimate_image_tokens() -> int:
-        """Fixed estimate for one vision-model image (~500 tokens)."""
-        return 500
-
-    def chat(
-        self, model: str, messages: list[dict], *, timeout: int | None = None,
-        response_format: dict | None = None,
-    ) -> str:
-        """Send a chat completion request and return the response content.
-
-        Automatically retries on failure and accumulates token usage.
-        """
-        label = f"chat({model})"
-
-        def _call():
-            t0 = time.time()
-            kwargs = dict(model=model, messages=messages, timeout=timeout or self._timeout)
-            if response_format is not None:
-                kwargs["response_format"] = response_format
-            kwargs["temperature"] = 0
-            resp = self._client.chat.completions.create(**kwargs)
-            content = resp.choices[0].message.content
-            usg = resp.usage
-            if usg:
-                self._prompt_tokens += usg.prompt_tokens
-                self._completion_tokens += usg.completion_tokens
-            elapsed = time.time() - t0
-            logger.info("%s: %d chars in %.1fs", label, len(content) if content else 0, elapsed)
-            if not content:
-                raise RuntimeError("Empty response from LLM")
-            return content
-
-        return self._retry(_call, label)
-
-    def _retry(self, fn, label: str) -> str:
-        """Call *fn()* with exponential-backoff retry."""
-        last_error: Optional[Exception] = None
-        for attempt in range(self.MAX_RETRIES):
-            try:
-                return fn()
-            except Exception as e:
-                last_error = e
-                logger.warning(
-                    "%s error (attempt %d/%d): %s",
-                    label, attempt + 1, self.MAX_RETRIES, e,
-                )
-                if attempt < self.MAX_RETRIES - 1:
-                    time.sleep(2 ** attempt)
-        raise RuntimeError(f"{label}: all retries exhausted") from last_error
@@ -1,359 +0,0 @@
-#!/usr/bin/env python3
-"""Generate JSON intermediate representation from ``_parsed.json`` or ``_updated.json``.
-
-Sends the JSON document directly to the LLM for analysis. If the document exceeds
-``MAX_ANALYSIS_TOKENS``, sections are batched greedily without splitting any
-individual section. Conflict corrections from ``resolved_conflicts`` are included
-so the output respects user arbitration decisions.
-
-Usage::
-
-    python scripts/ir_generator.py output/<basename>_updated.json [output_dir] [--dry-run]
-
-Output: ``<basename>_ir.json``
-"""
-
-import argparse
-import json
-import logging
-import os
-import sys
-import time
-
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-
-from LLM import LLMClient
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    datefmt="%Y-%m-%d %H:%M:%S",
-)
-
-logger = logging.getLogger(__name__)
-
-# ---------------------------------------------------------------------------
-# Configuration
-# ---------------------------------------------------------------------------
-
-RATE_LIMIT_DELAY = 0.5
-MAX_ANALYSIS_TOKENS = 6000  # max content size per LLM call
-
-
-# ---------------------------------------------------------------------------
-# Prompt
-# ---------------------------------------------------------------------------
-
-PROMPT = """你是一个需求文档分析助手。请分析以下需求文档的JSON内容，输出结构化JSON。
-
-## 已知修正（来自冲突检测）
-以下内容已确认修正，生成JSON时请**使用修正后的值**，不要同时输出两个版本。
-{conflict_context}
-
-## 待分析内容（JSON格式）
-
-{content}
-
-## JSON字段说明
- sections: 文档章节列表，每个章节含 source（章节标题）和 blocks（内容块数组）
- blocks: 类型含 para（段落，字段 text）和 table（表格，字段 rows，每行含 columns 数组）
- image_sources: 图片所在章节映射，key 为图片 rid
- image_analysis: 图片分析结果，每个含 rid、type（流程图/架构图/状态图等）、description
- resolved_conflicts: 已知修正列表，每个含 section、conflict_type、correction、source
-
-## 功能点定义
-
-只有满足以下**全部条件**的才视为功能点：
-1. 描述了一个**系统或软件要实现的具体行为**（有触发条件、执行动作、状态变化或逻辑规则）
-2. 该行为直接由**系统或框架**执行（不是人的操作流程、管理流程）
-3. 对用户或系统有**可观察的效果**
-
-**以下内容不是功能点，不要输出：**
- 术语/缩略词定义（
- 文档背景、范围说明（如 "本文档涵盖xxx"）
- 变更日志、版本记录、编制人信息
- 文档结构描述（如 "产品简介用户场景说明"）
- 纯文本的概述、没有具体行为的介绍
-
-## 决策树/流程图分解规则（重要）
-
-图片分析（image_analysis）中的流程图和决策树描述包含丰富的功能逻辑，**必须完全分解**：
-
-1. **每个叶子路径 = 一个独立 function**：从根节点到每个最终结果的完整路径，都拆成一个 function
-2. **每个判断分支 = 一个独立 function**：菱形判断节点的每个分支方向和对应的结果，单独作为一个 function
-3. **不同约束条件 = 不同 function**：例如"通过接入SDK限制"和"通过系统限制"是不同约束机制，必须分别列出
-4. **不要合并不同路径**：即使最终结果相同，只要到达路径不同，就是不同的 function
-
-## 输出格式
-
-只输出功能点，每个功能点格式如下：
-
-{
-  "function": "功能名称",
-  "source": {
-    "section": "章节名",
-    "location": "原文位置（如：正文第1段、表格1第2行、图片rId13）"
-  },
-  "trigger": {
-    "type": "AND或者OR",
-    "conditions": [
-      "触发条件1",
-      "触发条件2"
-    ]
-  },
-  "actions": {
-    "场景/角色": [
-      "动作1",
-      "动作2"
-    ]
-  }
-}
-
-## 输出原则
-
-1. **只输出功能点**，没有功能点就输出空数组 []
-2. 每个功能点**必须**包含 source.section 和 source.location
-3. location 必须是具体的原文位置标签（如 "正文第1段"、"表格1"、"图片rId13"）
-4. **一个 function 只对应一种行为逻辑（一条完整路径）**。决策树中的每个分支路径（从根到叶子）必须拆成独立 function，conditions 中明确写出该路径上的所有判断条件和分支方向。
-5. **穷举所有分支**：流程图/决策树中的每一条分支路径都要输出对应的 function，不能遗漏任何子逻辑。
-6. 没有 trigger 或 actions 的字段直接**省略**，不要写 null 或空列表/空对象
-7. 所有功能点全部列出，**宁多勿漏**
-8. **已知修正**中确认的信息，使用修正后的值
-9. 输出一个JSON数组，不要用 ```json 代码块包裹，直接输出纯JSON
-"""
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def _parse_llm_response(raw: str) -> list | dict | str | None:
-    """Parse JSON from LLM response, handling markdown code fences."""
-    if raw is None:
-        return None
-    stripped = raw.strip()
-    if stripped.startswith("```"):
-        nl = stripped.find("\n")
-        stripped = stripped[nl + 1:] if nl != -1 else stripped[3:]
-    if stripped.endswith("```"):
-        stripped = stripped[:-3]
-    try:
-        return json.loads(stripped)
-    except json.JSONDecodeError:
-        logger.warning("  Failed to parse JSON, returning raw text")
-        return raw
-
-
-def _build_conflict_context(
-    section_name: str | None,
-    resolved_conflicts: list[dict],
-) -> str:
-    """Build conflict correction context for a section, or all if section_name is None."""
-    if section_name is None:
-        relevant = resolved_conflicts
-    else:
-        relevant = [c for c in resolved_conflicts if c.get("section", "") == section_name]
-    if not relevant:
-        return "没有"
-
-    lines: list[str] = []
-    for c in relevant:
-        correction = c.get("correction", "")
-        conflict_type = c.get("conflict_type", "")
-        source = c.get("source", "")
-        lines.append(f"- 冲突类型：{conflict_type}，依据：{source}")
-        lines.append(f"  修正后的值：{correction}")
-
-    return "\n".join(lines)
-
-
-# ---------------------------------------------------------------------------
-# LLM analysis
-# ---------------------------------------------------------------------------
-
-def _analyze_content(
-    content: str,
-    conflict_context: str,
-    llm: LLMClient,
-    *,
-    dry_run: bool = False,
-) -> list[dict]:
-    """Send content to the LLM and return IR entries."""
-    prompt = PROMPT.replace("{conflict_context}", conflict_context).replace("{content}", content)
-
-    if dry_run:
-        est = llm.estimate_tokens(prompt)
-        logger.info("  [DRY RUN] prompt ~%d tokens", est)
-        return []
-
-    try:
-        raw = llm.chat(
-            model=LLMClient.TEXT_MODEL,
-            messages=[{"role": "user", "content": prompt}],
-            response_format={"type": "json_object"},
-        )
-        logger.info("  Response: %d chars", len(raw))
-    except RuntimeError as e:
-        logger.error("  Analysis failed: %s", e)
-        return []
-
-    parsed = _parse_llm_response(raw)
-    if isinstance(parsed, list):
-        return parsed
-    elif isinstance(parsed, dict):
-        return [parsed]
-    else:
-        logger.warning("  Unparseable response, raw length: %d", len(raw))
-        return []
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-def generate_ir(
-    parsed_path: str,
-    output_dir: str = "output",
-    *,
-    dry_run: bool = False,
-) -> dict:
-    """Read parsed/updated JSON and generate JSON IR.
-
-    Produces ``<basename>_ir.json`` in *output_dir*.
-    """
-    with open(parsed_path, "r", encoding="utf-8") as f:
-        data = json.load(f)
-
-    basename = os.path.splitext(os.path.basename(parsed_path))[0]
-    for suffix in ("_parsed", "_updated"):
-        if basename.endswith(suffix):
-            basename = basename[:-len(suffix)]
-            break
-    os.makedirs(output_dir, exist_ok=True)
-
-    llm = LLMClient()
-    ir_output: list[dict] = []
-
-    sections = data.get("sections", [])
-    image_sources = data.get("image_sources", {})
-    image_analysis = data.get("image_analysis", [])
-    resolved_conflicts = data.get("resolved_conflicts", [])
-
-    # Build full document JSON to measure size
-    full_doc = {
-        "sections": sections,
-        "image_sources": image_sources,
-        "image_analysis": image_analysis,
-    }
-    full_json = json.dumps(full_doc, ensure_ascii=False)
-    total_chars = len(full_json)
-    logger.info("Total document JSON chars: %d", total_chars)
-
-    if total_chars < MAX_ANALYSIS_TOKENS:
-        logger.info("Document fits in one request (< %d chars)", MAX_ANALYSIS_TOKENS)
-        conflict_ctx = _build_conflict_context(None, resolved_conflicts)
-        entries = _analyze_content(full_json, conflict_ctx, llm, dry_run=dry_run)
-        ir_output.extend(entries)
-    else:
-        logger.info("Document is large (>= %d chars), batching sections", MAX_ANALYSIS_TOKENS)
-
-        # Filter to non-empty sections, measure effective size per section
-        # (section JSON + image_sources + image_analysis for images in that section)
-        sec_sizes = []
-        for sec in sections:
-            if not sec.get("blocks"):
-                continue
-            sec_json = json.dumps(sec, ensure_ascii=False)
-            sec_chars = len(sec_json)
-            # Add image overhead for this section
-            sec_name = sec.get("source", "")
-            sec_rids = [rid for rid, src in image_sources.items()
-                        if src.get("section", "") == sec_name]
-            if sec_rids:
-                overhead_doc = {
-                    "image_sources": {rid: image_sources[rid] for rid in sec_rids},
-                    "image_analysis": [img for img in image_analysis
-                                       if img.get("rid", "") in sec_rids],
-                }
-                sec_chars += len(json.dumps(overhead_doc, ensure_ascii=False))
-            sec_sizes.append((sec, sec_chars))
-
-        # Greedy batch: never split a section, keep adding until next exceeds limit
-        i = 0
-        while i < len(sec_sizes):
-            batch = []
-            batch_size = 0
-            while i < len(sec_sizes) and batch_size + sec_sizes[i][1] <= MAX_ANALYSIS_TOKENS:
-                batch.append(sec_sizes[i][0])
-                batch_size += sec_sizes[i][1]
-                i += 1
-
-            if not batch:
-                i += 1
-                continue
-
-            # Collect sections and their images for this batch
-            batch_names = [s.get("source", "") for s in batch]
-            batch_image_sources = {
-                rid: src for rid, src in image_sources.items()
-                if src.get("section", "") in batch_names
-            }
-            batch_images = [
-                img for img in image_analysis
-                if image_sources.get(img.get("rid", ""), {}).get("section", "") in batch_names
-            ]
-
-            batch_doc = {
-                "sections": batch,
-                "image_sources": batch_image_sources,
-                "image_analysis": batch_images,
-            }
-            batch_json = json.dumps(batch_doc, ensure_ascii=False)
-
-            # Merge conflict contexts
-            ctx_parts = []
-            for sn in batch_names:
-                ctx = _build_conflict_context(sn, resolved_conflicts)
-                if ctx != "没有":
-                    ctx_parts.append(ctx)
-            conflict_ctx = "\n".join(ctx_parts) if ctx_parts else "没有"
-
-            label = " + ".join(batch_names)
-            logger.info("Batch [%s]: %d sections, %d chars", label, len(batch), len(batch_json))
-            entries = _analyze_content(batch_json, conflict_ctx, llm, dry_run=dry_run)
-            ir_output.extend(entries)
-            time.sleep(RATE_LIMIT_DELAY)
-
-    # ---- save ----------------------------------------------------------------
-    ir_path = os.path.join(output_dir, f"{basename}_ir.json")
-    os.makedirs(os.path.dirname(ir_path) or ".", exist_ok=True)
-    with open(ir_path, "w", encoding="utf-8") as f:
-        json.dump(ir_output, f, ensure_ascii=False, indent=2)
-    logger.info("Saved: %s (%d entries)", ir_path, len(ir_output))
-
-    # ---- summary -------------------------------------------------------------
-    usg = llm.usage
-    logger.info("Tokens: %d prompt + %d completion = %d total",
-                usg["prompt_tokens"], usg["completion_tokens"], usg["total_tokens"])
-    logger.info("Output: %s", ir_path)
-
-    return {"ir": ir_output, "path": ir_path}
-
-
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Generate JSON intermediate representation from parsed/updated JSON.",
-    )
-    parser.add_argument("input", metavar="parsed.json",
-                        help="Path to _parsed.json or _updated.json")
-    parser.add_argument("output_dir", nargs="?", default="output", metavar="output_dir",
-                        help="Directory for output files (default: output/)")
-    parser.add_argument("--dry-run", action="store_true",
-                        help="Print token estimates without calling the API.")
-
-    args = parser.parse_args()
-    generate_ir(args.input, args.output_dir, dry_run=args.dry_run)
@@ -0,0 +1,717 @@
+"""
+Stage 1: Ensemble Semantic Index Generation.
+
+Generates N parallel LLM calls with different temperatures (e.g., 0.0, 0.3, 0.7),
+then deterministically merges the results via ensemble_merge (pure Python, no LLM).
+The merged output includes confidence scores for each concept and function_unit.
+
+Outputs:
+  - output/semantic_index_r1.json (T=0.0 raw)
+  - output/semantic_index_r2.json (T=0.3 raw)
+  - output/semantic_index_r3.json (T=0.7 raw)
+  - output/semantic_index.json (ensemble-merged final)
+"""
+
+import concurrent.futures
+import json
+import re
+import sys
+import time
+from pathlib import Path
+
+import config
+from ensemble_merge import ensemble_merge
+
+
+# ---- Path Enumeration (for prompt embedding) ----
+
+
+def _traverse_nested(node: dict, image_id: str, path_nodes: list,
+                     branch_taken: str | None) -> list[dict]:
+    """DFS traversal of a logic_tree_nested node, returning leaf path records."""
+    node_id = node.get("id", "?")
+    node_type = node.get("type", "?")
+    node_name = node.get("name", "")
+
+    path_nodes = path_nodes + [{
+        "id": node_id,
+        "type": node_type,
+        "label": node_name,
+        "branch_taken": branch_taken,
+    }]
+
+    if node_type == "end":
+        return [_make_path_record(path_nodes, image_id)]
+
+    children = node.get("children", [])
+    if not children:
+        return [_make_path_record(path_nodes, image_id)]
+
+    all_paths = []
+    for child in children:
+        # Decision nodes have {condition, node} wrappers; others are direct node dicts
+        if node_type == "decision":
+            condition = child.get("condition", "")
+            child_node = child.get("node", child)
+        else:
+            condition = "(implicit)"
+            child_node = child
+
+        all_paths.extend(
+            _traverse_nested(child_node, image_id, path_nodes, condition)
+        )
+
+    return all_paths
+
+
+def _make_path_record(path_nodes: list, image_id: str) -> dict:
+    """Build a path record from a completed node chain."""
+    action_nodes = [n for n in path_nodes if n["type"] == "action"]
+    decision_nodes = [n for n in path_nodes if n["type"] == "decision"]
+    node_ids = [n["id"] for n in path_nodes]
+
+    return {
+        "path_id": f"PATH-{image_id}-{'-'.join(node_ids)}",
+        "nodes": path_nodes,
+        "meaning": _describe_path(path_nodes),
+        "image_id": image_id,
+        "action_nodes": action_nodes,
+        "decision_nodes": decision_nodes,
+        "node_ids": node_ids,
+    }
+
+
+def enumerate_logic_tree_paths(nested_tree: dict, image_id: str = "") -> list[dict]:
+    """Enumerate all root-to-leaf paths from a logic_tree_nested structure.
+
+    Uses the nested tree directly (no flat-list adjacency). Decision nodes
+    fork by {condition, node} branches; other nodes have direct children.
+    """
+    if not nested_tree:
+        return []
+    return _traverse_nested(nested_tree, image_id, [], None)
+
+
+def _describe_path(path_nodes: list[dict]) -> str:
+    """Generate a human-readable description of a logic tree path."""
+    parts = []
+    for n in path_nodes:
+        label = n["label"]
+        if n["branch_taken"] and n["branch_taken"] != "(implicit)":
+            label = f"{label} → {n['branch_taken']}"
+        parts.append(label)
+    return " → ".join(parts)
+
+
+def enumerate_all_paths(doc: dict) -> dict[str, list[dict]]:
+    """Enumerate paths for all logic trees in the document.
+
+    Uses logic_tree_nested when available (proper tree), falling back to
+    flat logic_tree. Returns {image_id: [path, ...]}.
+    """
+    result = {}
+    for img in doc.get("image_analysis", []):
+        rid = img.get("rid", "")
+        if not rid:
+            continue
+        nested = img.get("logic_tree_nested")
+        if nested:
+            result[rid] = enumerate_logic_tree_paths(nested, image_id=rid)
+        else:
+            lt = img.get("logic_tree")
+            if lt and lt.get("nodes"):
+                lt["image_id"] = rid
+                result[rid] = _enumerate_flat_tree(lt)
+            elif lt:
+                result[rid] = []
+    return result
+
+
+def _enumerate_flat_tree(tree: dict) -> list[dict]:
+    """Fallback: enumerate paths from flat logic_tree using adjacency.
+    Handles start/process/action/state nodes as implicit chain links.
+    """
+    nodes = tree.get("nodes", [])
+    if not nodes:
+        return []
+    node_map = {n["id"]: n for n in nodes}
+    image_id = tree.get("image_id", "")
+
+    # Find root: first start/state node, or first process node, or first node
+    root = None
+    for n in nodes:
+        if n["type"] in ("start", "state"):
+            root = n
+            break
+    if root is None:
+        for n in nodes:
+            if n["type"] == "process":
+                root = n
+                break
+    if root is None:
+        root = nodes[0]
+
+    adj = _build_adjacency(nodes, node_map)
+    paths = []
+
+    def dfs(current_id, visited, path_nodes, branch_taken):
+        if current_id in visited:
+            return
+        new_visited = visited | {current_id}
+        node = node_map.get(current_id)
+        if node is None:
+            return
+
+        path_nodes = path_nodes + [{
+            "id": current_id,
+            "type": node["type"],
+            "label": node.get("description") or node.get("condition", ""),
+            "branch_taken": branch_taken,
+        }]
+
+        outgoing = adj.get(current_id, [])
+        if not outgoing:
+            action_nodes = [n for n in path_nodes if n["type"] == "action"]
+            decision_nodes = [n for n in path_nodes if n["type"] == "decision"]
+            node_ids = [n["id"] for n in path_nodes]
+            paths.append({
+                "path_id": f"PATH-{image_id}-{'-'.join(node_ids)}",
+                "nodes": path_nodes,
+                "meaning": _describe_path(path_nodes),
+                "image_id": image_id,
+                "action_nodes": action_nodes,
+                "decision_nodes": decision_nodes,
+                "node_ids": node_ids,
+            })
+        else:
+            for branch_val, target_id in outgoing:
+                dfs(target_id, new_visited, path_nodes, branch_val)
+
+    dfs(root["id"], set(), [], None)
+    return paths
+
+
+def _build_adjacency(nodes, node_map):
+    """Build {node_id: [(branch_value, target_id)]} adjacency for flat trees.
+
+    Handles: decision branches (explicit), non-branching nodes (implicit sequential).
+    """
+    NON_BRANCHING = {"start", "process", "state", "action"}
+
+    adj = {}
+    has_explicit_incoming = set()
+    for n in nodes:
+        for br in n.get("branches", []):
+            has_explicit_incoming.add(br["target"])
+
+    for i, node in enumerate(nodes):
+        nid = node["id"]
+        adj.setdefault(nid, [])
+
+        # Explicit edges from decision nodes
+        for br in node.get("branches", []):
+            adj[nid].append((br["value"], br["target"]))
+
+        # Implicit edges for non-branching nodes (start/process/state/action)
+        if node["type"] in NON_BRANCHING and not node.get("branches"):
+            j = i + 1
+            targets = []
+            while j < len(nodes):
+                next_node = nodes[j]
+                next_nid = next_node["id"]
+                if next_nid in has_explicit_incoming:
+                    break
+                if next_node["type"] in NON_BRANCHING | {"end"}:
+                    targets.append(next_nid)
+                    has_explicit_incoming.add(next_nid)
+                    j += 1
+                    continue
+                elif next_node["type"] == "decision":
+                    if not targets:
+                        targets.append(next_nid)
+                    break
+                j += 1
+            for t in targets:
+                adj[nid].append(("(implicit)", t))
+
+    return adj
+
+
+def format_paths_for_prompt(all_paths: dict[str, list[dict]]) -> str:
+    """Format enumerated paths as a readable list for the LLM prompt."""
+    if not all_paths:
+        return "（无逻辑树路径）"
+
+    lines = []
+    for image_id, paths in all_paths.items():
+        lines.append(f"\n### {image_id} 的全部决策路径（共 {len(paths)} 条）：")
+        for i, path in enumerate(paths, 1):
+            lines.append(f"\n**路径 {i}** (ID: {path['path_id']})")
+            lines.append(f"  含义: {path['meaning']}")
+            lines.append(f"  节点: {path['node_ids']}")
+            lines.append(f"  决策节点: {[n['id'] for n in path['decision_nodes']]}")
+            lines.append(f"  动作节点: {[n['id'] for n in path['action_nodes']]}")
+    return "\n".join(lines)
+
+
+# ---- Document Formatting ----
+
+
+def format_document_for_prompt(doc: dict) -> str:
+    """Render the full parsed document as a readable string for the LLM prompt."""
+    lines = []
+
+    lines.append("=== SECTIONS ===")
+    for i, section in enumerate(doc.get("sections", [])):
+        source = section.get("source", f"(无标题-章节{i})")
+        lines.append(f"\n--- Section: {source} ---")
+
+        for block in section.get("blocks", []):
+            if block["type"] == "para":
+                lines.append(f"[段落 {block['index']}] {block['text']}")
+            elif block["type"] == "table":
+                lines.append(f"[表格 {block.get('table', '?')}]")
+                headers = block.get("headers", [])
+                lines.append(f"  表头: {' | '.join(headers)}")
+                for row in block.get("rows", []):
+                    cols = row.get("columns", [])
+                    cell_texts = []
+                    for c in cols:
+                        cell_texts.append(
+                            f"[行{c.get('row','?')}]{c.get('name','')}: {c.get('text','')}"
+                        )
+                    lines.append(f"  {'; '.join(cell_texts)}")
+
+        images = section.get("images", [])
+        if images:
+            lines.append(f"  图片引用: {', '.join(images)}")
+
+    lines.append("\n\n=== IMAGE_ANALYSIS (流程图逻辑树) ===")
+    for img in doc.get("image_analysis", []):
+        rid = img.get("rid", "?")
+        img_type = img.get("type", "?")
+        lines.append(f"\n--- Image: {rid} (type={img_type}) ---")
+        lines.append(f"  描述: {img.get('description', '')[:300]}")
+
+        lt = img.get("logic_tree")
+        if lt:
+            lines.append(f"  逻辑树根节点: {lt.get('root', '?')}")
+            lines.append("  节点详情:")
+            for node in lt.get("nodes", []):
+                nid = node.get("id", "?")
+                ntype = node.get("type", "?")
+                desc = node.get("description", "") or node.get("condition", "")
+                lines.append(f"    [{ntype}] {nid}: {desc}")
+                branches = node.get("branches", [])
+                if branches:
+                    for br in branches:
+                        lines.append(f"      → {br['value']} → {br['target']}")
+
+    conflicts = doc.get("resolved_conflicts", [])
+    if conflicts:
+        lines.append("\n\n=== RESOLVED_CONFLICTS (图文冲突仲裁) ===")
+        for c in conflicts:
+            lines.append(
+                f"  [{c.get('conflict_type','?')}] {c.get('section','?')}: "
+                f"以{c.get('source','?')}为准 — {c.get('correction','')}"
+            )
+
+    return "\n".join(lines)
+
+
+# ---- Prompt Building ----
+
+
+def build_prompt(doc: dict, feedback: str = "", all_paths: dict | None = None) -> str:
+    """Load the prompt template and inject the formatted document + paths + feedback."""
+    template_path = Path(config.PROMPTS_DIR) / "step1_semantic_index.txt"
+    template = template_path.read_text(encoding="utf-8")
+
+    formatted_doc = format_document_for_prompt(doc)
+    prompt = template.replace("{document_json}", formatted_doc)
+
+    if all_paths is None:
+        all_paths = enumerate_all_paths(doc)
+    path_text = format_paths_for_prompt(all_paths)
+    prompt = prompt.replace("{logic_tree_paths}", path_text)
+
+    if feedback:
+        prompt = prompt.replace("{feedback}", feedback)
+    else:
+        prompt = prompt.replace("{feedback}", "")
+
+    return prompt
+
+
+# ---- Validation ----
+
+
+def _quick_validate(
+    semantic_index: dict, doc: dict, all_paths: dict | None = None
+) -> tuple[bool, dict]:
+    """Validate semantic index and return (passed, gaps).
+
+    Uses a single COVERAGE_TARGET threshold (default 0.95).
+    """
+    gaps = {
+        "missing_paths": [],
+        "missing_concepts": [],
+        "format_issues": [],
+        "parent_issues": [],
+    }
+
+    units = semantic_index.get("function_units", [])
+    concepts = semantic_index.get("concepts", [])
+
+    # --- Check function_units non-empty ---
+    if not units:
+        gaps["format_issues"].append("function_units 为空")
+        return False, gaps
+
+    # --- Check each function_unit has path ---
+    for fu in units:
+        uid = fu.get("unit_id", "?")
+        if not fu.get("path"):
+            gaps["format_issues"].append(f"{uid}: 缺少 path 字段")
+        if not fu.get("sources"):
+            gaps["format_issues"].append(f"{uid}: 缺少 sources")
+
+    # --- Logic tree node coverage ---
+    all_nodes = _collect_logic_tree_nodes(doc)
+    referenced = _collect_referenced_nodes(units)
+
+    threshold = config.COVERAGE_TARGET
+
+    for image_id, node_set in all_nodes.items():
+        ref_set = referenced.get(image_id, set())
+        checkable = {
+            nid for nid, ntype in node_set.items()
+            if ntype in ("decision", "action")
+        }
+        if not checkable:
+            continue
+        covered = checkable & ref_set
+        coverage = len(covered) / len(checkable) if checkable else 1.0
+
+        if coverage < threshold:
+            missing = checkable - ref_set
+            gaps["missing_paths"].append(
+                f"{image_id}: 覆盖率 {coverage:.0%} < {threshold:.0%}, "
+                f"未覆盖节点: {sorted(missing)}"
+            )
+
+    # --- Check logic tree path consistency ---
+    # A unit's logic_tree_nodes must form a valid (connected) path in the tree.
+    if all_paths is not None:
+        for fu in units:
+            uid = fu.get("unit_id", "?")
+            for src in fu.get("sources", []):
+                if src.get("type") != "logic_tree":
+                    continue
+                image_id = src.get("image_id", "")
+                unit_nodes = set(src.get("logic_tree_nodes", []))
+                if not unit_nodes:
+                    continue
+                # Check if there exists a path containing all these nodes
+                valid = False
+                for path in all_paths.get(image_id, []):
+                    path_nodes = set(path.get("node_ids", []))
+                    if unit_nodes.issubset(path_nodes):
+                        valid = True
+                        break
+                if not valid:
+                    gaps["format_issues"].append(
+                        f"{uid}: logic_tree_nodes 不构成有效路径 "
+                        f"(image={image_id}, nodes={sorted(unit_nodes)})"
+                    )
+
+    # --- Check for trivial units (only state/switch nodes, no actions) ---
+    if all_paths is not None:
+        for fu in units:
+            uid = fu.get("unit_id", "?")
+            has_logic_ref = False
+            has_action = False
+            has_non_trivial_decision = False
+            for src in fu.get("sources", []):
+                if src.get("type") != "logic_tree":
+                    continue
+                has_logic_ref = True
+                node_ids = src.get("logic_tree_nodes", [])
+                node_types = {}
+                for image_id, nset in all_nodes.items():
+                    for nid in node_ids:
+                        if nid in nset:
+                            node_types[nid] = nset[nid]
+                for nid in node_ids:
+                    ntype = node_types.get(nid, "")
+                    if ntype == "action":
+                        has_action = True
+                # Count decisions beyond first level (e.g., n1/n2 are just root+switch)
+                decisions = [nid for nid in node_ids
+                            if node_types.get(nid, "") == "decision"]
+                if len(decisions) > 1:
+                    has_non_trivial_decision = True
+            if has_logic_ref and not has_action and not has_non_trivial_decision:
+                gaps["format_issues"].append(
+                    f"{uid}: 可能为空壳单元（仅有state/开关节点，无action或深层decision）"
+                )
+
+    # --- Concept parent validity ---
+    concept_names = {c["name"] for c in concepts}
+    for c in concepts:
+        name = c.get("name", "?")
+        parent = c.get("parent")  # can be None for scope-level
+        if parent is not None and parent not in concept_names:
+            gaps["parent_issues"].append(
+                f"concept '{name}' 的 parent '{parent}' 不存在"
+            )
+    # Warn about scope-level concepts without parent=null
+    for c in concepts:
+        if c.get("parent") is not None:
+            continue
+        name = c.get("name", "")
+        # Scope-level concepts (国内/海外) should have parent=null
+        if name not in ("国内", "海外", ""):
+            gaps["parent_issues"].append(
+                f"concept '{name}' 的 parent 为 null，但它可能不是 scope 概念"
+            )
+
+    # --- Check for missing scope concepts ---
+    if "国内" not in concept_names:
+        gaps["missing_concepts"].append("缺少 scope 概念: 国内")
+    if "海外" not in concept_names and any(
+        "海外" in s.get("source", "") for s in doc.get("sections", [])
+    ):
+        gaps["missing_concepts"].append("缺少 scope 概念: 海外")
+
+    passed = (
+        not gaps["missing_paths"]
+        and not gaps["format_issues"]
+        and not gaps["parent_issues"]
+    )
+    return passed, gaps
+
+
+def _collect_logic_tree_nodes(doc: dict) -> dict[str, dict[str, str]]:
+    """Return {image_id: {node_id: node_type}} for all logic trees."""
+    result = {}
+    for img in doc.get("image_analysis", []):
+        lt = img.get("logic_tree")
+        rid = img.get("rid", "")
+        if lt and rid:
+            result[rid] = {n["id"]: n["type"] for n in lt.get("nodes", [])}
+    return result
+
+
+def _collect_referenced_nodes(units: list[dict]) -> dict[str, set[str]]:
+    """Return {image_id: {referenced node_ids}} across all function_units."""
+    refs = {}
+    for fu in units:
+        for src in fu.get("sources", []):
+            if src.get("type") == "logic_tree":
+                image_id = src.get("image_id", "")
+                if image_id not in refs:
+                    refs[image_id] = set()
+                refs[image_id].update(src.get("logic_tree_nodes", []))
+    return refs
+
+
+# ---- LLM Calls ----
+
+
+def extract_json_from_response(text: str) -> str:
+    """Robustly extract JSON from LLM response."""
+    m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
+    if m:
+        return m.group(1).strip()
+
+    start = text.find("{")
+    if start == -1:
+        raise ValueError("No JSON object found in LLM response")
+
+    depth = 0
+    for i in range(start, len(text)):
+        if text[i] == "{":
+            depth += 1
+        elif text[i] == "}":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+
+    raise ValueError("Unclosed JSON object in LLM response")
+
+
+def call_llm(prompt: str, max_retries: int = 2,
+             temperature: float | None = None) -> dict:
+    """Send prompt to LLM, return parsed JSON dict.
+
+    Args:
+        temperature: Override config.TEMPERATURE. If None, uses config default.
+    """
+    client = config.llm_client()
+    temp = temperature if temperature is not None else config.TEMPERATURE
+
+    for attempt in range(max_retries + 1):
+        print(f"  LLM 调用 T={temp} (尝试 {attempt + 1}/{max_retries + 1})...", flush=True)
+        try:
+            resp = client.chat.completions.create(
+                model=config.MODEL_NAME,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "你是一个精确的 JSON 输出引擎。只输出合法的 JSON。",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=temp,
+                max_tokens=config.MAX_TOKENS,
+            )
+            content = resp.choices[0].message.content
+            if content is None:
+                raise RuntimeError("LLM returned empty response")
+
+            json_str = extract_json_from_response(content)
+            return json.loads(json_str)
+
+        except (json.JSONDecodeError, ValueError) as e:
+            print(f"  JSON 解析失败: {e}")
+            if attempt < max_retries:
+                time.sleep(2)
+
+    raise RuntimeError("无法从 LLM 响应中解析 JSON")
+
+
+# ---- Ensemble Orchestration ----
+
+
+def run_ensemble_semantic_index(doc: dict) -> dict:
+    """Run N parallel LLM calls at different temperatures, then ensemble-merge.
+
+    1. Enumerate all logic tree paths (once).
+    2. Build the prompt (once — no iterative feedback needed).
+    3. Launch len(ENSEMBLE_TEMPERATURES) parallel LLM calls via ThreadPoolExecutor.
+    4. Collect all results.
+    5. Call ensemble_merge() for deterministic merge.
+    6. Validate final output with _quick_validate().
+    7. Save individual version outputs + merged output.
+    """
+    all_paths = enumerate_all_paths(doc)
+    print(f"  已枚举逻辑树路径: {sum(len(v) for v in all_paths.values())} 条")
+
+    prompt = build_prompt(doc, "", all_paths)
+    print(f"  Prompt 长度: {len(prompt)} 字符")
+
+    temperatures = config.ENSEMBLE_TEMPERATURES
+    print(f"  集成温度: {temperatures}")
+
+    # Parallel LLM calls
+    raw_results: list[tuple[int, float, dict]] = []
+
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=len(temperatures)
+    ) as executor:
+        future_to_meta = {}
+        for i, temp in enumerate(temperatures):
+            future = executor.submit(call_llm, prompt, 2, temp)
+            future_to_meta[future] = (i, temp)
+
+        for future in concurrent.futures.as_completed(future_to_meta):
+            idx, temp = future_to_meta[future]
+            try:
+                si = future.result()
+                n_units = len(si.get("function_units", []))
+                n_concepts = len(si.get("concepts", []))
+                print(f"  T={temp}: {n_concepts} 概念, {n_units} 功能单元")
+                raw_results.append((idx, temp, si))
+            except Exception as e:
+                print(f"  T={temp}: FAIL — {e}")
+                raw_results.append((idx, temp, {
+                    "feature_name": "", "concepts": [], "function_units": []
+                }))
+
+    if not raw_results:
+        raise RuntimeError("所有集成的 LLM 调用均失败")
+
+    # Sort by temperature for determinism
+    raw_results.sort(key=lambda x: x[1])
+    semantic_indices = [r[2] for r in raw_results]
+
+    # Save individual version outputs
+    version_paths = {
+        0: config.SEMANTIC_INDEX_R1_JSON,
+        1: config.SEMANTIC_INDEX_R2_JSON,
+        2: config.SEMANTIC_INDEX_R3_JSON,
+    }
+    for i, si in enumerate(semantic_indices):
+        out_path = version_paths.get(i)
+        if out_path:
+            config.save_json(si, out_path)
+            print(f"  保存版本 {i} (T={temperatures[i]}): {out_path}")
+
+    # Ensemble merge
+    print(f"\n  集成合并 {len(semantic_indices)} 个版本...")
+    merged = ensemble_merge(semantic_indices)
+    merged["ensemble_temperatures"] = list(temperatures)
+
+    # Validate
+    passed, gaps = _quick_validate(merged, doc, all_paths)
+    merged["validation_passed"] = passed
+    merged["validation_gaps"] = {
+        k: v for k, v in gaps.items() if v
+    }
+
+    # Print summary
+    cs = merged.get("confidence_summary", {})
+    print(f"  合并后: {cs.get('total_concepts', 0)} 概念, "
+          f"{cs.get('total_units', 0)} 功能单元")
+    print(f"  置信度: high={cs.get('high', 0)}, medium={cs.get('medium', 0)}, "
+          f"low={cs.get('low', 0)}")
+    print(f"  验证: {'PASS' if passed else 'GAPS FOUND'}")
+    if not passed:
+        for k, v in gaps.items():
+            if v:
+                print(f"    {k}: {len(v)} 个问题")
+
+    return merged
+
+
+# ---- Main ----
+
+
+def main():
+    print("=" * 60)
+    print("阶段一：集成语义索引 (Ensemble Semantic Index)")
+    print("=" * 60)
+
+    # 1. Load input
+    print(f"\n[1/3] 加载输入文档: {config.INPUT_JSON}")
+    doc = config.load_input_document()
+    print(f"  已加载 {len(doc.get('sections', []))} 个 section, "
+          f"{len(doc.get('image_analysis', []))} 张图片分析")
+
+    # 2. Run ensemble generation + merge
+    print(f"\n[2/3] 运行集成语义索引 ({len(config.ENSEMBLE_TEMPERATURES)} 个温度版本)...")
+    merged_index = run_ensemble_semantic_index(doc)
+
+    # 3. Save outputs
+    print(f"\n[3/3] 保存最终语义索引: {config.SEMANTIC_INDEX_JSON}")
+    config.save_json(merged_index, config.SEMANTIC_INDEX_JSON)
+
+    # Also save path enumeration for downstream use
+    all_paths = enumerate_all_paths(doc)
+    config.save_json(
+        {"logic_tree_paths": {k: v for k, v in all_paths.items()}},
+        config.PATH_ENUM_JSON,
+    )
+    print(f"  路径枚举: {config.PATH_ENUM_JSON}")
+
+    cs = merged_index.get("confidence_summary", {})
+    n_concepts = cs.get("total_concepts", len(merged_index.get("concepts", [])))
+    n_units = cs.get("total_units", len(merged_index.get("function_units", [])))
+    n_versions = merged_index.get("ensemble_versions", len(config.ENSEMBLE_TEMPERATURES))
+    print(f"\n完成! {n_versions} 版本集成, {n_concepts} 个概念, {n_units} 个功能单元.")
+    print(f"输出: {config.SEMANTIC_INDEX_JSON}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,399 @@
+"""
+Stage 2.5: Branch Coverage Auto-Completion.
+
+1. Enumerates all root-to-leaf paths in every logic tree
+2. Compares paths against existing IR rules to find uncovered paths
+3. Generates synthetic function_units for uncovered paths
+4. Calls LLM (same extract_rules_for_unit) to produce rules for synthetic units
+5. Iterates up to MAX_RETRIES_PER_STAGE rounds to reach COVERAGE_TARGET
+
+Outputs:
+  - output/path_enumeration.json
+  - output/ir_autocomplete_fragments.json
+"""
+
+import concurrent.futures
+import json
+import time
+from pathlib import Path
+
+import config
+
+
+# ---- Path Enumeration (shared with step1, duplicated for module independence) ----
+
+
+def enumerate_all_paths(doc: dict) -> dict[str, list[dict]]:
+    """Enumerate all root-to-leaf paths for every logic tree."""
+    from step1_semantic_index import enumerate_all_paths as _enum
+    return _enum(doc)
+
+
+# ---- Coverage Analysis ----
+
+
+def find_referenced_path_ids(rules: list[dict]) -> dict[str, set[str]]:
+    """Map each rule to the set of logic tree nodes it references.
+
+    Returns {rule_id: set of "image_id:node_id" pairs}
+    """
+    result = {}
+    for rule in rules:
+        rid = rule.get("rule_id", "?")
+        refs = set()
+        for src in rule.get("sources", []):
+            if src.get("type") == "logic_tree":
+                image_id = src.get("image_id", "")
+                for nid in src.get("node_ids", []):
+                    refs.add(f"{image_id}:{nid}")
+        result[rid] = refs
+    return result
+
+
+def compute_path_coverage(
+    all_paths: dict[str, list[dict]], rules: list[dict]
+) -> tuple[list[dict], list[dict], dict]:
+    """Compute coverage of enumerated paths by existing rules.
+
+    Returns (covered_paths, uncovered_paths, stats).
+    A path is "covered" if at least one rule's node_ids form a superset
+    of the path's decision+action nodes for that image.
+    """
+    # Build per-rule node sets keyed by image_id
+    rule_node_sets = {}  # {rule_id: {image_id: set(node_ids)}}
+    for rule in rules:
+        rid = rule.get("rule_id", "?")
+        rule_node_sets[rid] = {}
+        for src in rule.get("sources", []):
+            if src.get("type") == "logic_tree":
+                image_id = src.get("image_id", "")
+                rule_node_sets[rid].setdefault(image_id, set()).update(
+                    src.get("node_ids", [])
+                )
+
+    covered = []
+    uncovered = []
+
+    for image_id, paths in all_paths.items():
+        for path in paths:
+            # Get checkable nodes for this path (decision + action)
+            checkable = set(
+                n["id"] for n in path["nodes"]
+                if n["type"] in ("decision", "action")
+            )
+            if not checkable:
+                # Path with no decision/action nodes — trivially covered
+                covered.append(path)
+                continue
+
+            path_covered = False
+            for rid, img_sets in rule_node_sets.items():
+                rule_nodes = img_sets.get(image_id, set())
+                if checkable.issubset(rule_nodes):
+                    path_covered = True
+                    break
+
+            if path_covered:
+                covered.append(path)
+            else:
+                uncovered.append(path)
+
+    total = len(covered) + len(uncovered)
+    stats = {
+        "total_paths": total,
+        "covered_paths": len(covered),
+        "uncovered_paths": len(uncovered),
+        "coverage_pct": round(len(covered) / total * 100, 1) if total > 0 else 100.0,
+    }
+    return covered, uncovered, stats
+
+
+# ---- Synthetic Function Unit Generation ----
+
+
+def generate_synthetic_unit(path: dict, unit_seq: int) -> dict:
+    """Create a synthetic function_unit from an uncovered logic tree path.
+
+    Infers preconditions and trigger from the decision nodes along the path.
+    """
+    node_map = {n["id"]: n for n in path["nodes"]}
+
+    # Infer switch state from path
+    switch = _infer_switch_state(path)
+
+    # Infer app_type from path
+    app_type = _infer_app_type(path)
+
+    # Infer app_state from path
+    app_state = _infer_app_state(path)
+
+    # Infer geographic_scope from section context
+    scope = _infer_scope(path)
+
+    # Build description from path meaning
+    description = f"自动补全: {path.get('meaning', '')}"
+    if switch:
+        description = f"开关{switch}, {description}"
+
+    # Build path list
+    path_labels = []
+    if scope:
+        path_labels.append(scope)
+    if switch:
+        path_labels.append(f"开关{switch}")
+    if app_type:
+        path_labels.append(app_type)
+    if app_state:
+        path_labels.append(app_state)
+    # Add behavior from terminal action
+    action_nodes = path.get("action_nodes", [])
+    if action_nodes:
+        last_action = action_nodes[-1].get("label", "")
+        path_labels.append(last_action[:20])
+
+    unit_id = f"FU-AUTO-{path['image_id']}-{unit_seq:03d}"
+    seq = f"{unit_seq:03d}"
+
+    return {
+        "unit_id": unit_id,
+        "name": f"自动补全-{path.get('meaning', '')[:60]}",
+        "description": description,
+        "path": path_labels,
+        "auto_generated": True,
+        "sources": [
+            {
+                "section": "",
+                "type": "logic_tree",
+                "image_id": path["image_id"],
+                "logic_tree_nodes": path.get("node_ids", []),
+            }
+        ],
+    }
+
+
+def _infer_switch_state(path: dict) -> str:
+    """Infer switch state from decision nodes in path."""
+    for n in path["nodes"]:
+        label = n.get("label", "")
+        branch = n.get("branch_taken", "")
+        if "开关" in label and n["type"] == "decision":
+            if branch == "开启":
+                return "开启"
+            elif branch == "关闭":
+                return "关闭"
+    return ""
+
+
+def _infer_app_type(path: dict) -> str:
+    """Infer app type from state nodes in path."""
+    type_map = {
+        "其他应用": "其他应用",
+        "SDK限制": "SDK限制",
+        "通过接入SDK限制的应用": "SDK限制",
+        "系统限制": "系统限制",
+        "通过系统限制应用": "系统限制",
+    }
+    for n in path["nodes"]:
+        if n["type"] == "state":
+            for key, val in type_map.items():
+                if key in n.get("label", ""):
+                    return val
+    return ""
+
+
+def _infer_app_state(path: dict) -> str:
+    """Infer app state (前台/后台) from decision nodes."""
+    for n in path["nodes"]:
+        label = n.get("label", "")
+        branch = n.get("branch_taken", "")
+        if "前台" in label:
+            if branch == "是":
+                return "前台"
+            elif branch == "否":
+                return "后台"
+    return ""
+
+
+def _infer_scope(path: dict) -> str:
+    """Infer geographic scope. Defaults to 国内."""
+    return "国内"
+
+
+# ---- LLM Extraction for Synthetic Units ----
+
+
+def extract_rules_for_synthetic_units(
+    synthetic_units: list[dict], doc: dict, max_retries: int | None = None
+) -> list[dict]:
+    """Extract IR rules for synthetic function_units using step2's LLM logic."""
+    from step2_ir_extraction import (
+        build_document_lookup,
+        extract_context_package,
+        extract_rules_for_unit,
+    )
+
+    if max_retries is None:
+        max_retries = config.MAX_RETRIES_PER_STAGE
+
+    sections_by_source, image_by_rid, conflicts_by_section = build_document_lookup(doc)
+
+    fragments = []
+    for unit in synthetic_units:
+        pkg = extract_context_package(
+            unit, doc, sections_by_source, image_by_rid, conflicts_by_section
+        )
+        # Enrich pkg with unit's own path and description
+        pkg["unit_path"] = unit.get("path", [])
+        pkg["unit_description"] = unit.get("description", pkg["unit_description"])
+
+        try:
+            rules = extract_rules_for_unit(pkg, max_retries)
+        except Exception as e:
+            rules = []
+
+        fragments.append({
+            "unit_id": unit["unit_id"],
+            "unit_name": unit.get("name", ""),
+            "rules": rules,
+            "auto_generated": True,
+        })
+        print(f"    {unit['unit_id']}: {len(rules)} 条规则")
+
+    return fragments
+
+
+# ---- Iterative Auto-Completion ----
+
+
+def run_autocomplete(
+    all_paths: dict[str, list[dict]],
+    existing_rules: list[dict],
+    doc: dict,
+) -> tuple[list[dict], dict]:
+    """Run iterative auto-completion. Returns (autocomplete_fragments, final_stats)."""
+    print(f"\n  初始路径覆盖率分析...")
+    covered, uncovered, stats = compute_path_coverage(all_paths, existing_rules)
+    print(f"  覆盖: {stats['covered_paths']}/{stats['total_paths']} "
+          f"({stats['coverage_pct']}%)")
+
+    if not uncovered:
+        print(f"  所有路径已覆盖，无需自动补全")
+        return [], stats
+
+    print(f"  未覆盖路径: {len(uncovered)} 条")
+
+    all_fragments = []
+    best_stats = stats
+
+    for round_n in range(1, config.MAX_RETRIES_PER_STAGE + 1):
+        if not uncovered:
+            break
+
+        print(f"\n--- 自动补全 第 {round_n} 轮 ---")
+        print(f"  为 {len(uncovered)} 条未覆盖路径生成合成单元...")
+
+        # Generate synthetic units
+        start_seq = (round_n - 1) * len(uncovered) + 1
+        synthetic_units = [
+            generate_synthetic_unit(path, start_seq + i)
+            for i, path in enumerate(uncovered)
+        ]
+
+        # Extract rules via LLM
+        max_llm_workers = min(2, len(synthetic_units))
+        if len(synthetic_units) <= 1:
+            fragments = extract_rules_for_synthetic_units(synthetic_units, doc)
+        else:
+            # Sequential to avoid flooding the API
+            fragments = extract_rules_for_synthetic_units(synthetic_units, doc)
+
+        all_fragments.extend(fragments)
+
+        # Re-compute coverage
+        all_rules = existing_rules + [
+            rule for f in fragments for rule in f.get("rules", [])
+        ]
+        covered, uncovered, stats = compute_path_coverage(all_paths, all_rules)
+        print(f"  第 {round_n} 轮后覆盖: {stats['covered_paths']}/{stats['total_paths']} "
+              f"({stats['coverage_pct']}%)")
+
+        if stats["coverage_pct"] > best_stats["coverage_pct"]:
+            best_stats = stats
+
+        if stats["coverage_pct"] >= config.COVERAGE_TARGET * 100:
+            print(f"  达到目标覆盖率 {config.COVERAGE_TARGET:.0%}，停止")
+            break
+
+        # If coverage didn't improve, try a different approach next round
+        uncovered_decision_nodes = set()
+        for p in uncovered:
+            for n in p.get("decision_nodes", []):
+                uncovered_decision_nodes.add(n.get("label", ""))
+        if not uncovered_decision_nodes:
+            print(f"  无更多可补全路径，停止")
+            break
+
+    return all_fragments, best_stats
+
+
+# ---- Main ----
+
+
+def main():
+    print("=" * 60)
+    print("阶段 2.5：分支覆盖自动补全")
+    print("=" * 60)
+
+    # 1. Load inputs
+    print(f"\n[1/5] 加载输入...")
+    doc = config.load_input_document()
+    fragments = config.load_json(config.IR_FRAGMENTS_JSON)
+
+    all_rules = []
+    for f in fragments:
+        all_rules.extend(f.get("rules", []))
+
+    print(f"  已有规则: {len(all_rules)} 条")
+
+    # 2. Enumerate paths
+    print(f"\n[2/5] 枚举逻辑树路径...")
+    all_paths = enumerate_all_paths(doc)
+    total_paths = sum(len(v) for v in all_paths.values())
+    print(f"  共 {total_paths} 条路径")
+
+    # Save path enumeration for downstream audit
+    path_enum_data = {
+        "logic_tree_paths": {
+            k: [{kk: vv for kk, vv in p.items() if kk != "nodes"} for p in v]
+            for k, v in all_paths.items()
+        },
+        "total_paths": total_paths,
+    }
+    config.save_json(path_enum_data, config.PATH_ENUM_JSON)
+
+    # 3. Run auto-completion
+    print(f"\n[3/5] 运行自动补全...")
+    autocomplete_fragments, final_stats = run_autocomplete(
+        all_paths, all_rules, doc
+    )
+
+    # 4. Save
+    print(f"\n[4/5] 保存自动补全片段...")
+    config.save_json(
+        autocomplete_fragments, config.IR_AUTOCOMPLETE_FRAGMENTS_JSON
+    )
+    print(f"  输出: {config.IR_AUTOCOMPLETE_FRAGMENTS_JSON}")
+    print(f"  生成 {len(autocomplete_fragments)} 个补全片段")
+
+    # 5. Summary
+    print(f"\n[5/5] 完成!")
+    print(f"  最终路径覆盖: {final_stats['covered_paths']}/{final_stats['total_paths']} "
+          f"({final_stats['coverage_pct']}%)")
+
+    if final_stats["coverage_pct"] < config.COVERAGE_TARGET * 100:
+        remaining = final_stats["total_paths"] - final_stats["covered_paths"]
+        print(f"  WARN: {remaining} 条路径仍未覆盖，将在审计报告中列出")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,508 @@
+"""
+Stage 2: Per Function Unit IR Extraction.
+
+For each function unit from the semantic index, constructs a precision context
+package and calls the LLM to extract detailed IR rules.
+
+Runs multiple LLM calls in parallel (up to MAX_CONCURRENCY).
+
+Output: output/ir_fragments.json
+"""
+
+import concurrent.futures
+import json
+import re
+import sys
+import time
+from pathlib import Path
+
+import config
+
+
+MAX_CONCURRENCY = 3  # Max parallel LLM calls
+
+
+def load_semantic_index() -> dict:
+    """Load the semantic index from Stage 1."""
+    return config.load_json(config.SEMANTIC_INDEX_JSON)
+
+
+def build_document_lookup(doc: dict):
+    """Build lookup structures for fast context extraction from the document."""
+
+    # sections_by_source: "3.1.1" -> section dict
+    sections_by_source = {}
+    for section in doc.get("sections", []):
+        source = section.get("source", "")
+        # Normalize: extract leading number like "3.1.1"
+        parts = source.split()
+        if parts:
+            key = parts[0].strip()
+            sections_by_source[key] = section
+
+    # image_by_rid: "rId16" -> image_analysis entry
+    image_by_rid = {}
+    for img in doc.get("image_analysis", []):
+        rid = img.get("rid", "")
+        if rid:
+            image_by_rid[rid] = img
+
+    # Conflicts indexed by section
+    conflicts_by_section = {}
+    for c in doc.get("resolved_conflicts", []):
+        section = c.get("section", "")
+        key = section.split()[0] if section else ""
+        conflicts_by_section.setdefault(key, []).append(c)
+
+    return sections_by_source, image_by_rid, conflicts_by_section
+
+
+def extract_context_package(
+    fu: dict, doc: dict, sections_by_source: dict, image_by_rid: dict,
+    conflicts_by_section: dict
+) -> dict:
+    """Build a precision context package for a single function unit."""
+    texts = []
+    tables = []
+    logic_trees = []
+    seen_sections = set()
+    seen_images = set()
+
+    for src in fu.get("sources", []):
+        src_type = src.get("type", "")
+        section_key = src.get("section", "").split()[0] if src.get("section") else ""
+
+        # --- Text source ---
+        if src_type in ("table", "para") and section_key:
+            if section_key in seen_sections:
+                continue
+            seen_sections.add(section_key)
+
+            section = sections_by_source.get(section_key)
+            if section is None:
+                # Fuzzy match by prefix
+                for key in sections_by_source:
+                    if key.startswith(section_key):
+                        section = sections_by_source[key]
+                        break
+
+            if section:
+                for block in section.get("blocks", []):
+                    if block["type"] == "para":
+                        texts.append({
+                            "section": section_key,
+                            "text": block["text"]
+                        })
+                    elif block["type"] == "table":
+                        row_num = src.get("row") if src_type == "table" else None
+                        if row_num is not None:
+                            # Extract only the specific row
+                            matching_rows = []
+                            for r in block.get("rows", []):
+                                for c in r.get("columns", []):
+                                    if c.get("row") == row_num:
+                                        matching_rows.append({
+                                            "headers": block.get("headers", []),
+                                            "cells": {
+                                                col["name"]: col["text"]
+                                                for col in r["columns"]
+                                            },
+                                            "row": row_num
+                                        })
+                                        break
+                            tables.append({
+                                "section": section_key,
+                                "headers": block.get("headers", []),
+                                "rows": matching_rows,
+                                "all_rows": [
+                                    {
+                                        "row": col.get("row"),
+                                        "name": col.get("name"),
+                                        "text": col.get("text")
+                                    }
+                                    for row in block.get("rows", [])
+                                    for col in row.get("columns", [])
+                                ]
+                            })
+                        else:
+                            # Include full table
+                            tables.append({
+                                "section": section_key,
+                                "headers": block.get("headers", []),
+                                "all_rows": [
+                                    {
+                                        "row": col.get("row"),
+                                        "name": col.get("name"),
+                                        "text": col.get("text")
+                                    }
+                                    for row in block.get("rows", [])
+                                    for col in row.get("columns", [])
+                                ]
+                            })
+
+        # --- Logic tree source ---
+        if src_type == "logic_tree":
+            image_id = src.get("image_id", "")
+            if not image_id or image_id in seen_images:
+                continue
+            seen_images.add(image_id)
+
+            img = image_by_rid.get(image_id)
+            if img:
+                lt = img.get("logic_tree")
+                if lt:
+                    logic_trees.append({
+                        "image_id": image_id,
+                        "description": img.get("description", ""),
+                        "tree": lt
+                    })
+
+    # Include relevant resolved conflicts
+    relevant_conflicts = []
+    for section_key in seen_sections:
+        for c in conflicts_by_section.get(section_key, []):
+            relevant_conflicts.append(c)
+
+    return {
+        "unit_id": fu["unit_id"],
+        "unit_name": fu.get("name", ""),
+        "unit_description": fu.get("description", ""),
+        "unit_path": fu.get("path", []),
+        "texts": texts,
+        "tables": tables,
+        "logic_trees": logic_trees,
+        "resolved_conflicts": relevant_conflicts
+    }
+
+
+def format_context_package(pkg: dict) -> str:
+    """Format a context package as a readable string for the prompt."""
+    parts = []
+
+    # Texts
+    parts.append("【文字段落】")
+    for i, t in enumerate(pkg.get("texts", [])):
+        parts.append(f"[{t.get('section', '?')}] {t.get('text', '')}")
+    if not pkg.get("texts"):
+        parts.append("（无）")
+
+    # Tables
+    parts.append("\n【表格数据】")
+    for i, tbl in enumerate(pkg.get("tables", [])):
+        parts.append(f"表格 {i+1} (section={tbl.get('section', '?')})")
+        headers = tbl.get("headers", [])
+        parts.append(f"  表头: {headers}")
+        parts.append("  全部行数据:")
+        for row in tbl.get("all_rows", []):
+            parts.append(
+                f"    行{row.get('row','?')}[{row.get('name','?')}]: {row.get('text','')}"
+            )
+        # Highlight matched rows if any
+        matched = tbl.get("rows", [])
+        if matched:
+            parts.append("  <重点关注行>:")
+            for mr in matched:
+                parts.append(f"    行{mr.get('row','?')}: {mr.get('cells', {})}")
+    if not pkg.get("tables"):
+        parts.append("（无）")
+
+    # Logic trees
+    parts.append("\n【逻辑树】")
+    for i, lt in enumerate(pkg.get("logic_trees", [])):
+        parts.append(f"逻辑树 {i+1} (image_id={lt.get('image_id', '?')})")
+        parts.append(f"  描述: {lt.get('description', '')[:200]}")
+        tree = lt.get("tree", {})
+        parts.append(f"  根: {tree.get('root', '?')}")
+        parts.append("  节点:")
+        for node in tree.get("nodes", []):
+            nid = node.get("id", "?")
+            ntype = node.get("type", "?")
+            desc = node.get("description", "") or node.get("condition", "")
+            parts.append(f"    [{ntype}] {nid}: {desc}")
+            for br in node.get("branches", []):
+                parts.append(f"      → {br['value']} → {br['target']}")
+    if not pkg.get("logic_trees"):
+        parts.append("（无）")
+
+    # Conflicts
+    conflicts = pkg.get("resolved_conflicts", [])
+    if conflicts:
+        parts.append("\n【图文冲突仲裁】")
+        for c in conflicts:
+            parts.append(
+                f"  [{c.get('conflict_type', '?')}] 以{c.get('source', '?')}为准: "
+                f"{c.get('correction', '')}"
+            )
+
+    return "\n".join(parts)
+
+
+def _escape_json_for_format(s: str) -> str:
+    """Escape curly braces in a JSON string for use with str.format()."""
+    return s.replace("{", "{{").replace("}", "}}")
+
+
+def build_prompt(pkg: dict, format_feedback: str = "") -> str:
+    """Build the LLM prompt for a single function unit."""
+    template_path = Path(config.PROMPTS_DIR) / "step2_ir_extraction.txt"
+    template = template_path.read_text(encoding="utf-8")
+
+    prompt = template.format(
+        unit_id=pkg["unit_id"],
+        unit_name=_escape_json_for_format(pkg["unit_name"]),
+        unit_description=_escape_json_for_format(pkg["unit_description"]),
+        texts=_escape_json_for_format(
+            json.dumps(pkg.get("texts", []), ensure_ascii=False, indent=2)
+        ),
+        tables=_escape_json_for_format(
+            json.dumps(pkg.get("tables", []), ensure_ascii=False, indent=2)
+        ),
+        logic_trees=_escape_json_for_format(
+            json.dumps(pkg.get("logic_trees", []), ensure_ascii=False, indent=2)
+        ),
+        resolved_conflicts=_escape_json_for_format(
+            json.dumps(pkg.get("resolved_conflicts", []), ensure_ascii=False, indent=2)
+        ),
+        format_feedback=_escape_json_for_format(format_feedback),
+    )
+    return prompt
+
+
+def extract_json_from_response(text: str) -> str:
+    """Extract JSON array from LLM response."""
+    m = re.search(r"```(?:json)?\s*(\[[\s\S]*?\])\s*```", text)
+    if m:
+        return m.group(1).strip()
+
+    # Find outermost [ ... ]
+    start = text.find("[")
+    if start == -1:
+        raise ValueError("No JSON array found in LLM response")
+
+    depth = 0
+    for i in range(start, len(text)):
+        if text[i] == "[":
+            depth += 1
+        elif text[i] == "]":
+            depth -= 1
+            if depth == 0:
+                return text[start : i + 1]
+
+    raise ValueError("Unclosed JSON array in LLM response")
+
+
+def _check_rule_fields(rules: list[dict]) -> tuple[bool, list[dict]]:
+    """Validate each rule has required fields. Returns (passed, failures).
+
+    Each failure: {rule_id, field, issue}
+    """
+    failures = []
+    for j, rule in enumerate(rules):
+        if not isinstance(rule, dict):
+            failures.append({"rule_id": f"rule[{j}]", "field": "-", "issue": "规则不是 dict"})
+            continue
+        rid = rule.get("rule_id") or f"rule[{j}]"
+
+        if not rule.get("path"):
+            failures.append({"rule_id": rid, "field": "path", "issue": "缺少 path 字段（必填）"})
+
+        precond = rule.get("precondition") or {}
+        if not precond.get("geographic_scope"):
+            failures.append({"rule_id": rid, "field": "precondition.geographic_scope", "issue": "缺少 geographic_scope（必填）"})
+
+        for k, action in enumerate(rule.get("actions") or []):
+            if not isinstance(action, dict):
+                continue
+            if action.get("type") == "user_interaction":
+                content = action.get("content") or ""
+                if not content:
+                    failures.append({
+                        "rule_id": rid, "field": f"actions[{k}].content",
+                        "issue": "user_interaction 的 content 为空"
+                    })
+                elif any(ph in content for ph in ["文案由业务定义", "待定", "自定义"]):
+                    failures.append({
+                        "rule_id": rid, "field": f"actions[{k}].content",
+                        "issue": f"content 包含占位符: '{content}'"
+                    })
+
+        trigger = rule.get("trigger") or {}
+        for k, cond in enumerate(trigger.get("conditions") or []):
+            if isinstance(cond, dict):
+                if not cond.get("signal"):
+                    failures.append({
+                        "rule_id": rid, "field": f"trigger.conditions[{k}].signal",
+                        "issue": "缺少 signal"
+                    })
+                if not cond.get("operator"):
+                    failures.append({
+                        "rule_id": rid, "field": f"trigger.conditions[{k}].operator",
+                        "issue": "缺少 operator"
+                    })
+                if "value" not in cond:
+                    failures.append({
+                        "rule_id": rid, "field": f"trigger.conditions[{k}].value",
+                        "issue": "缺少 value"
+                    })
+
+    return len(failures) == 0, failures
+
+
+def _build_fix_prompt(failures: list[dict]) -> str:
+    """Build a format-fix instruction block for the prompt."""
+    if not failures:
+        return ""
+
+    lines = [
+        "\n## 上一轮格式问题修正\n",
+        "上一轮输出的规则存在以下格式问题，请修正后重新输出：\n",
+    ]
+    for f in failures:
+        lines.append(f"- **{f['rule_id']}.{f['field']}**: {f['issue']}")
+
+    lines.append("\n请修正以上所有问题，重新输出完整的规则数组。")
+    return "\n".join(lines)
+
+
+def extract_rules_for_unit(pkg: dict, max_retries: int | None = None) -> list[dict]:
+    """Call LLM for one function unit, return its IR rules.
+
+    Includes format validation with auto-fix retries.
+    """
+    if max_retries is None:
+        max_retries = config.MAX_RETRIES_PER_STAGE
+    client = config.llm_client()
+    prompt = build_prompt(pkg)
+    last_failures = []
+
+    for attempt in range(max_retries + 1):
+        # Append format feedback on retry
+        if attempt > 0 and last_failures:
+            fix_text = _build_fix_prompt(last_failures)
+            prompt = build_prompt(pkg, format_feedback=fix_text)
+
+        try:
+            resp = client.chat.completions.create(
+                model=config.MODEL_NAME,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "你是一个精确的 JSON 输出引擎。只输出合法的 JSON 数组。",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                temperature=config.TEMPERATURE,
+                max_tokens=config.MAX_TOKENS,
+            )
+            content = resp.choices[0].message.content
+            if content is None:
+                raise RuntimeError("LLM returned empty response")
+
+            json_str = extract_json_from_response(content)
+            rules = json.loads(json_str)
+            if not isinstance(rules, list):
+                raise ValueError(f"Expected JSON array, got {type(rules).__name__}")
+
+            # Format validation
+            passed, failures = _check_rule_fields(rules)
+            if passed:
+                return rules
+
+            # Format issues found — retry with fix instructions
+            print(f"    格式问题 ({len(failures)} 个): {[f['field'] for f in failures[:5]]}")
+            last_failures = failures
+            if attempt < max_retries:
+                time.sleep(1)
+
+        except (json.JSONDecodeError, ValueError) as e:
+            print(f"    JSON 解析失败 (尝试 {attempt + 1}): {e}")
+            last_failures = [{"rule_id": "?", "field": "json", "issue": str(e)}]
+            if attempt < max_retries:
+                time.sleep(2)
+
+    # Exhausted retries — return what we have (even if imperfect)
+    print(f"    WARN: {pkg['unit_id']} 格式修复耗尽了 {max_retries} 次重试")
+    return []
+
+
+def extract_all_rules(
+    semantic_index: dict, doc: dict
+) -> list[dict]:
+    """Extract IR rules for all function units. Runs in parallel up to MAX_CONCURRENCY."""
+    sections_by_source, image_by_rid, conflicts_by_section = build_document_lookup(doc)
+    function_units = semantic_index.get("function_units", [])
+
+    print(f"  共 {len(function_units)} 个功能单元待处理")
+    print(f"  最大并发: {MAX_CONCURRENCY}")
+
+    # Build context packages (serial — fast)
+    packages = []
+    for fu in function_units:
+        pkg = extract_context_package(
+            fu, doc, sections_by_source, image_by_rid, conflicts_by_section
+        )
+        packages.append(pkg)
+
+    # Run LLM calls in parallel
+    fragments = []
+    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENCY) as executor:
+        futures = {}
+        for i, pkg in enumerate(packages):
+            future = executor.submit(extract_rules_for_unit, pkg)
+            futures[future] = (i, pkg["unit_id"], pkg["unit_name"])
+
+        for future in concurrent.futures.as_completed(futures):
+            i, uid, uname = futures[future]
+            try:
+                rules = future.result()
+                fragments.append({
+                    "unit_id": uid,
+                    "unit_name": uname,
+                    "rules": rules
+                })
+                print(f"  [OK] {uid} ({uname}): {len(rules)} 条规则")
+            except Exception as e:
+                print(f"  [FAIL] {uid} ({uname}): 失败 — {e}")
+                fragments.append({
+                    "unit_id": uid,
+                    "unit_name": uname,
+                    "rules": [],
+                    "error": str(e)
+                })
+
+    # Sort by unit_id to maintain stable ordering
+    fragments.sort(key=lambda f: f["unit_id"])
+    return fragments
+
+
+def main():
+    print("=" * 60)
+    print("阶段二：逐功能单元 IR 提取")
+    print("=" * 60)
+
+    # 1. Load inputs
+    print(f"\n[1/3] 加载输入...")
+    semantic_index = load_semantic_index()
+    doc = config.load_input_document()
+    n_units = len(semantic_index.get("function_units", []))
+    print(f"  语义索引: {n_units} 个功能单元")
+
+    # 2. Extract rules
+    print(f"\n[2/3] 逐单元提取 IR 规则...")
+    fragments = extract_all_rules(semantic_index, doc)
+
+    # 3. Save
+    print(f"\n[3/3] 保存 IR 片段...")
+    config.save_json(fragments, config.IR_FRAGMENTS_JSON)
+
+    total_rules = sum(len(f["rules"]) for f in fragments)
+    failed_units = [f for f in fragments if f.get("error")]
+    print(f"\n完成! {len(fragments)} 个功能单元, 共 {total_rules} 条规则")
+    if failed_units:
+        print(f"  [WARN] {len(failed_units)} 个单元提取失败: "
+              f"{[f['unit_id'] for f in failed_units]}")
+    print(f"输出: {config.IR_FRAGMENTS_JSON}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,472 @@
+"""
+Tests for ensemble_merge.py — all pure Python, no LLM calls, no file I/O.
+
+Each test uses hardcoded mock data to verify one piece of the merge logic.
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from ensemble_merge import (
+    concept_name_similarity,
+    cluster_concepts,
+    merge_concept_cluster,
+    unit_node_jaccard,
+    path_similarity,
+    unit_similarity,
+    cluster_function_units,
+    pick_best_representative,
+    compute_confidence_versions,
+    ensemble_merge_concepts,
+    ensemble_merge_function_units,
+    ensemble_merge,
+    _collect_logic_tree_nodes,
+)
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+
+# ---- Mock helpers ----
+
+def _mk_unit(unit_id, name, path, logic_tree_nodes, description="", sources=None):
+    """Create a minimal function_unit dict for testing."""
+    if sources is None:
+        srcs = []
+        if logic_tree_nodes:
+            srcs.append({
+                "image_id": "rId16",
+                "type": "logic_tree",
+                "logic_tree_nodes": logic_tree_nodes,
+            })
+        if not srcs:
+            srcs.append({
+                "section": "3.1",
+                "type": "table",
+                "text_snippet": "test",
+            })
+    else:
+        srcs = sources
+    return {
+        "unit_id": unit_id,
+        "name": name,
+        "description": description or f"desc for {name}",
+        "path": path,
+        "sources": srcs,
+    }
+
+
+def _mk_concept(name, parent=None, aliases=None, defined_in=None):
+    """Create a minimal concept dict for testing."""
+    return {
+        "name": name,
+        "aliases": aliases or [],
+        "defined_in": defined_in or ["3.1"],
+        "parent": parent,
+    }
+
+
+# =============================================================================
+# Test 1: concept_name_similarity
+# =============================================================================
+
+def test_concept_name_similarity_exact():
+    assert concept_name_similarity("国内", "国内") == 1.0
+    assert concept_name_similarity("行车娱乐限制", "行车娱乐限制") == 1.0
+
+def test_concept_name_similarity_substring():
+    sim = concept_name_similarity("国内行车娱乐限制", "行车娱乐限制")
+    assert sim >= 0.85, f"expected >= 0.85, got {sim}"
+
+def test_concept_name_similarity_different():
+    sim = concept_name_similarity("国内", "海外")
+    assert sim < 0.7, f"expected < 0.7, got {sim}"
+
+def test_concept_name_similarity_seq_matcher():
+    sim = concept_name_similarity("前台打断", "前台应用打断")
+    assert 0.6 < sim < 0.95, f"expected 0.6-0.95, got {sim}"
+
+
+# =============================================================================
+# Test 2: _collect_logic_tree_nodes
+# =============================================================================
+
+def test_collect_logic_tree_nodes():
+    unit = _mk_unit("U1", "test", ["A"], ["n1", "n2", "n3"])
+    nodes = _collect_logic_tree_nodes(unit)
+    assert nodes == {"n1", "n2", "n3"}
+
+def test_collect_logic_tree_nodes_empty():
+    unit = _mk_unit("U2", "test", ["A"], [], sources=[{"section": "3.1", "type": "table"}])
+    nodes = _collect_logic_tree_nodes(unit)
+    assert nodes == set()
+
+
+# =============================================================================
+# Test 3: unit_node_jaccard
+# =============================================================================
+
+def test_unit_node_jaccard_identical():
+    u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2", "n3"])
+    u2 = _mk_unit("U2", "b", ["A"], ["n1", "n2", "n3"])
+    assert unit_node_jaccard(u1, u2) == 1.0
+
+def test_unit_node_jaccard_partial():
+    u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2", "n3", "n4"])
+    u2 = _mk_unit("U2", "b", ["A"], ["n1", "n2", "n3"])
+    # intersection=3, union=4
+    assert abs(unit_node_jaccard(u1, u2) - 0.75) < 0.01
+
+def test_unit_node_jaccard_disjoint():
+    u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2"])
+    u2 = _mk_unit("U2", "b", ["B"], ["n3", "n4"])
+    assert unit_node_jaccard(u1, u2) == 0.0
+
+def test_unit_node_jaccard_both_empty():
+    u1 = _mk_unit("U1", "a", ["A"], [], sources=[{"section": "3.1", "type": "table"}])
+    u2 = _mk_unit("U2", "b", ["B"], [], sources=[{"section": "3.1", "type": "table"}])
+    assert unit_node_jaccard(u1, u2) == 0.0
+
+
+# =============================================================================
+# Test 4: path_similarity
+# =============================================================================
+
+def test_path_similarity_identical():
+    assert path_similarity(
+        ["国内", "系统限制", "前台打断"],
+        ["国内", "系统限制", "前台打断"],
+    ) == 1.0
+
+def test_path_similarity_partial():
+    sim = path_similarity(
+        ["国内", "系统限制", "前台打断"],
+        ["国内", "系统限制", "后台限制启动"],
+    )
+    # 2/3 set overlap, sequential 3/5 ≈ 0.6
+    assert 0.4 < sim < 0.9, f"expected 0.4-0.9, got {sim}"
+
+def test_path_similarity_different():
+    sim = path_similarity(["国内"], ["海外"])
+    assert sim < 0.7, f"expected < 0.7, got {sim}"
+
+
+# =============================================================================
+# Test 5: unit_similarity
+# =============================================================================
+
+def test_unit_similarity_identical():
+    u = _mk_unit("U1", "国内-系统限制-前台打断",
+                 ["国内", "系统限制", "前台打断"],
+                 ["n1", "n2", "n3", "n8", "n19"])
+    assert unit_similarity(u, u) > 0.99
+
+def test_unit_similarity_different():
+    u1 = _mk_unit("U1", "a", ["国内", "系统限制", "前台打断"], ["n1", "n2", "n3"])
+    u2 = _mk_unit("U2", "b", ["海外", "SDK限制"], ["n10", "n11", "n12"])
+    assert unit_similarity(u1, u2) < 0.3
+
+
+# =============================================================================
+# Test 6: cluster_concepts
+# =============================================================================
+
+def test_cluster_concepts_identical():
+    v0 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
+    v1 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
+    v2 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
+    clusters = cluster_concepts([v0, v1, v2])
+    # Should have exactly 3 clusters (国内, 海外, 系统限制)
+    assert len(clusters) == 3, f"expected 3 clusters, got {len(clusters)}"
+    for c in clusters:
+        assert len(c) == 3, f"expected each cluster to have 3 members, got {len(c)}"
+
+def test_cluster_concepts_name_variation():
+    v0 = [_mk_concept("国内行车娱乐限制", parent="国内")]
+    v1 = [_mk_concept("行车娱乐限制", parent="国内")]
+    v2 = [_mk_concept("国内行车娱乐限制", parent="国内")]
+    clusters = cluster_concepts([v0, v1, v2])
+    assert len(clusters) == 1, f"expected 1 cluster, got {len(clusters)}"
+    assert len(clusters[0]) == 3, f"expected 3 members, got {len(clusters[0])}"
+
+
+# =============================================================================
+# Test 7: merge_concept_cluster
+# =============================================================================
+
+def test_merge_concept_cluster():
+    cluster = [
+        (0, _mk_concept("国内行车娱乐限制", parent="国内", aliases=["限制"])),
+        (1, _mk_concept("行车娱乐限制", parent="国内", aliases=["行车限制"])),
+        (2, _mk_concept("行车娱乐限制", parent="国内", aliases=["限制"])),
+    ]
+    merged, conf = merge_concept_cluster(cluster, 3)
+    assert "行车娱乐限制" in merged["name"]
+    assert merged["parent"] == "国内"
+    assert set(merged["aliases"]) == {"限制", "行车限制"}
+    assert conf in ("high", "medium")
+
+
+# =============================================================================
+# Test 8: cluster_function_units
+# =============================================================================
+
+def test_cluster_function_units_all_agree():
+    u0 = _mk_unit("U-001", "国内-系统限制-前台打断",
+                  ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  "switch ON, system app, foreground, speed>=15, non-P, interrupt + toast")
+    u1 = _mk_unit("U-001", "国内-系统限制-前台打断",
+                  ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  "switch ON, system app, foreground, speed>=15, non-P, interrupt + toast")
+    u2 = _mk_unit("U-001", "国内-系统限制-前台打断",
+                  ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  "switch ON, system app, foreground, interrupt")
+    clusters = cluster_function_units([[u0], [u1], [u2]])
+    assert len(clusters) == 1, f"expected 1 cluster, got {len(clusters)}"
+    assert len(clusters[0]) == 3
+
+def test_cluster_function_units_partial_agree():
+    u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19"])
+    u1 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19"])
+    u2 = _mk_unit("U-002", "禁止", ["国内", "系统限制", "后台限制启动"],
+                  ["n5", "n6"])
+    clusters = cluster_function_units([[u0], [u1], [u2]])
+    # u0+u1 in one cluster, u2 in another
+    assert len(clusters) == 2, f"expected 2 clusters, got {len(clusters)}"
+    cluster_sizes = sorted(len(c) for c in clusters)
+    assert cluster_sizes == [1, 2], f"expected cluster sizes [1,2], got {cluster_sizes}"
+
+def test_cluster_function_units_all_disagree():
+    u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"], ["n1", "n2", "n3"])
+    u1 = _mk_unit("U-002", "禁止", ["国内", "系统限制", "后台限制启动"], ["n5", "n6"])
+    u2 = _mk_unit("U-003", "SDK", ["国内", "SDK限制"], ["n10", "n11"])
+    clusters = cluster_function_units([[u0], [u1], [u2]])
+    assert len(clusters) == 3, f"expected 3 clusters, got {len(clusters)}"
+
+
+# =============================================================================
+# Test 9: pick_best_representative
+# =============================================================================
+
+def test_pick_best_representative_prefers_rich():
+    u0 = _mk_unit("U-001", "short", ["国内", "系统限制"],
+                  ["n1", "n2", "n3"],
+                  description="short desc")
+    u1 = _mk_unit("U-001", "detailed", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  description="very detailed description of the full rule behavior " * 5)
+    cluster = [(0, u0), (1, u1)]
+    best = pick_best_representative(cluster)
+    # u1 should win: more nodes, longer description, though u0 has lower temp
+    assert best["name"] == "detailed"
+
+
+# =============================================================================
+# Test 10: compute_confidence_versions
+# =============================================================================
+
+def test_confidence_high_unanimous():
+    assert compute_confidence_versions(3, 3, True) == "high"
+
+def test_confidence_high_two_of_three_with_t0():
+    assert compute_confidence_versions(2, 3, True) == "high"
+
+def test_confidence_medium_two_of_three_without_t0():
+    assert compute_confidence_versions(2, 3, False) == "medium"
+
+def test_confidence_low_one_of_three():
+    assert compute_confidence_versions(1, 3, False) == "low"
+
+def test_confidence_high_all_two_versions():
+    assert compute_confidence_versions(2, 2, True) == "high"
+
+
+# =============================================================================
+# Test 11: ensemble_merge_concepts
+# =============================================================================
+
+def test_ensemble_merge_concepts():
+    v0 = [_mk_concept("国内"), _mk_concept("海外"),
+          _mk_concept("国内行车娱乐限制", parent="国内")]
+    v1 = [_mk_concept("国内"), _mk_concept("海外"),
+          _mk_concept("行车娱乐限制", parent="国内",
+                       aliases=["限制"], defined_in=["3.1", "3.1.1"])]
+    v2 = [_mk_concept("国内"), _mk_concept("海外"),
+          _mk_concept("行车娱乐限制", parent="国内")]
+
+    merged = ensemble_merge_concepts([v0, v1, v2])
+    # Should merge the 3 concepts across 3 versions into 3 clusters
+    assert len(merged) == 3, f"expected 3 merged concepts, got {len(merged)}"
+    for c in merged:
+        assert "confidence" in c
+        assert "ensemble_support" in c
+        assert c["ensemble_support"] == "3/3"
+
+
+# =============================================================================
+# Test 12: ensemble_merge_function_units
+# =============================================================================
+
+def test_ensemble_merge_function_units():
+    u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  description="full description A")
+    u1 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
+                  description="full description B (more detail)")
+    u2 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                  ["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25"],
+                  description="partial description")
+
+    merged = ensemble_merge_function_units([[u0], [u1], [u2]])
+    assert len(merged) == 1, f"expected 1 unit, got {len(merged)}"
+    unit = merged[0]
+    assert unit["confidence"] == "high"
+    assert unit["ensemble_support"] == "3/3"
+    assert unit["source_versions"] == 3
+    assert unit["unit_id"].startswith("FU-ENS-")
+    # Should have picked u1 (more detail)
+    assert "more detail" in unit["description"]
+
+
+# =============================================================================
+# Test 13: ensemble_merge full integration
+# =============================================================================
+
+def test_ensemble_merge_full():
+    v0 = {
+        "feature_name": "行车娱乐限制",
+        "concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
+        "function_units": [
+            _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                     ["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
+            _mk_unit("U-002", "后台禁止", ["国内", "系统限制", "后台限制启动"],
+                     ["n5", "n6"]),
+        ],
+    }
+    v1 = {
+        "feature_name": "行车娱乐限制",
+        "concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
+        "function_units": [
+            _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                     ["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
+            _mk_unit("U-003", "SDK自定义", ["国内", "SDK限制", "自定义限制"],
+                     ["n10", "n11"]),
+        ],
+    }
+    v2 = {
+        "feature_name": "行车娱乐限制",
+        "concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
+        "function_units": [
+            _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
+                     ["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
+        ],
+    }
+
+    result = ensemble_merge([v0, v1, v2])
+
+    assert result["feature_name"] == "行车娱乐限制"
+    assert result["ensemble_versions"] == 3
+
+    units = result["function_units"]
+    concepts = result["concepts"]
+
+    # Concepts: 国内 + 系统限制
+    assert len(concepts) == 2
+
+    # Units: 打断 (3 versions → high), 后台禁止 (1 version → low), SDK (1 version → low)
+    assert len(units) == 3
+
+    high_units = [u for u in units if u["confidence"] == "high"]
+    low_units = [u for u in units if u["confidence"] == "low"]
+    assert len(high_units) == 1
+    assert len(low_units) == 2
+
+    # All units should have ensemble fields
+    for u in units:
+        assert "confidence" in u
+        assert "ensemble_support" in u
+        assert "source_versions" in u
+
+    # Confidence summary
+    cs = result["confidence_summary"]
+    assert cs["total_units"] == 3
+    assert cs["high"] == 1
+    assert cs["low"] == 2
+
+
+# =============================================================================
+# Runner
+# =============================================================================
+
+def run_all_tests():
+    print("=" * 60)
+    print("Ensemble Merge 测试 (纯 Python, 无 LLM)")
+    print("=" * 60)
+
+    tests = [
+        ("concept_name_similarity exact", test_concept_name_similarity_exact),
+        ("concept_name_similarity substring", test_concept_name_similarity_substring),
+        ("concept_name_similarity different", test_concept_name_similarity_different),
+        ("concept_name_similarity seq_matcher", test_concept_name_similarity_seq_matcher),
+        ("collect_logic_tree_nodes", test_collect_logic_tree_nodes),
+        ("collect_logic_tree_nodes empty", test_collect_logic_tree_nodes_empty),
+        ("unit_node_jaccard identical", test_unit_node_jaccard_identical),
+        ("unit_node_jaccard partial", test_unit_node_jaccard_partial),
+        ("unit_node_jaccard disjoint", test_unit_node_jaccard_disjoint),
+        ("unit_node_jaccard both_empty", test_unit_node_jaccard_both_empty),
+        ("path_similarity identical", test_path_similarity_identical),
+        ("path_similarity partial", test_path_similarity_partial),
+        ("path_similarity different", test_path_similarity_different),
+        ("unit_similarity identical", test_unit_similarity_identical),
+        ("unit_similarity different", test_unit_similarity_different),
+        ("cluster_concepts identical", test_cluster_concepts_identical),
+        ("cluster_concepts name variation", test_cluster_concepts_name_variation),
+        ("merge_concept_cluster", test_merge_concept_cluster),
+        ("cluster_function_units all_agree", test_cluster_function_units_all_agree),
+        ("cluster_function_units partial_agree", test_cluster_function_units_partial_agree),
+        ("cluster_function_units all_disagree", test_cluster_function_units_all_disagree),
+        ("pick_best_representative", test_pick_best_representative_prefers_rich),
+        ("confidence high unanimous", test_confidence_high_unanimous),
+        ("confidence high 2/3 with t0", test_confidence_high_two_of_three_with_t0),
+        ("confidence medium 2/3 no t0", test_confidence_medium_two_of_three_without_t0),
+        ("confidence low 1/3", test_confidence_low_one_of_three),
+        ("confidence high 2/2", test_confidence_high_all_two_versions),
+        ("ensemble_merge_concepts", test_ensemble_merge_concepts),
+        ("ensemble_merge_function_units", test_ensemble_merge_function_units),
+        ("ensemble_merge full", test_ensemble_merge_full),
+    ]
+
+    passed = 0
+    failed = 0
+    for name, test_fn in tests:
+        try:
+            test_fn()
+            print(f"  {PASS} {name}")
+            passed += 1
+        except AssertionError as e:
+            print(f"  {FAIL} {name}: {e}")
+            failed += 1
+        except Exception as e:
+            print(f"  {FAIL} {name}: unexpected {type(e).__name__}: {e}")
+            failed += 1
+
+    print(f"\n{'='*60}")
+    if failed == 0:
+        print(f"{PASS} 所有 {passed} 个测试通过!")
+    else:
+        print(f"{FAIL} {failed}/{passed + failed} 个测试失败")
+    print(f"{'='*60}")
+
+    return failed == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)
@@ -0,0 +1,370 @@
+"""
+Tests for Stage 1 (Semantic Index).
+
+Validates that the generated semantic_index.json meets all completeness
+and structural requirements, including the new iterative features:
+- function_units have path fields
+- concepts have parent references
+- logic tree node coverage meets thresholds
+"""
+
+import json
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+
+def load_inputs():
+    """Load semantic_index.json and the original parsed document."""
+    try:
+        si = config.load_json(config.SEMANTIC_INDEX_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} semantic_index.json 未找到: {config.SEMANTIC_INDEX_JSON}")
+        print("  请先运行 step1_semantic_index.py")
+        sys.exit(1)
+    doc = config.load_input_document()
+    return si, doc
+
+
+def build_image_index(doc: dict) -> dict[str, dict]:
+    """Build lookup: image rId -> image_analysis entry."""
+    idx = {}
+    for img in doc.get("image_analysis", []):
+        rid = img.get("rid", "")
+        if rid:
+            idx[rid] = img
+    return idx
+
+
+def build_logic_tree_node_index(doc: dict) -> dict[str, set[str]]:
+    """Build lookup: image rId -> set of all node IDs in that logic_tree."""
+    idx = {}
+    for img in doc.get("image_analysis", []):
+        rid = img.get("rid", "")
+        lt = img.get("logic_tree")
+        if lt and rid:
+            node_ids = {n["id"] for n in lt.get("nodes", [])}
+            idx[rid] = node_ids
+    return idx
+
+
+def check_unit_ids(units: list[dict]) -> list[str]:
+    """Check that every function_unit has a non-empty unit_id and name."""
+    errors = []
+    seen_ids = set()
+    for i, fu in enumerate(units):
+        uid = fu.get("unit_id", "")
+        name = fu.get("name", "")
+        if not uid:
+            errors.append(f"function_unit[{i}]: unit_id 为空")
+        elif uid in seen_ids:
+            errors.append(f"function_unit[{i}]: unit_id '{uid}' 重复")
+        seen_ids.add(uid)
+        if not name:
+            errors.append(f"function_unit[{i}] ({uid}): name 为空")
+    return errors
+
+
+def check_unit_paths(units: list[dict]) -> list[str]:
+    """Check that every function_unit has a non-empty path array."""
+    errors = []
+    for fu in units:
+        uid = fu.get("unit_id", "?")
+        path = fu.get("path", [])
+        if not path:
+            errors.append(f"{uid}: path 字段为空或缺失")
+        elif not isinstance(path, list):
+            errors.append(f"{uid}: path 必须是数组")
+    return errors
+
+
+def check_concept_parents(concepts: list[dict]) -> list[str]:
+    """Check that non-scope concepts have valid parent references."""
+    errors = []
+    concept_names = {c.get("name", "") for c in concepts}
+    scope_concepts = {"国内", "海外"}
+
+    for c in concepts:
+        name = c.get("name", "?")
+        parent = c.get("parent", "")
+
+        if name in scope_concepts:
+            # Scope concepts should have no parent
+            if parent:
+                errors.append(f"scope 概念 '{name}' 不应有 parent (当前: '{parent}')")
+        else:
+            # Non-scope concepts must have a parent
+            if not parent:
+                errors.append(f"概念 '{name}' 缺少 parent 字段")
+            elif parent not in concept_names:
+                errors.append(f"概念 '{name}' 的 parent '{parent}' 不存在于 concepts 中")
+
+    return errors
+
+
+def check_sources_exist(
+    units: list[dict], image_index: dict[str, dict], node_index: dict[str, set[str]]
+) -> list[str]:
+    """Check that all source references point to real content."""
+    errors = []
+    for fu in units:
+        uid = fu.get("unit_id", "?")
+        sources = fu.get("sources", [])
+        if not sources:
+            errors.append(f"{uid}: sources 为空，必须至少引用一张图片或一段文字")
+            continue
+
+        has_text = False
+        has_image = False
+
+        for j, src in enumerate(sources):
+            src_type = src.get("type", "")
+            if src_type in ("table", "para"):
+                has_text = True
+                section = src.get("section", "")
+                if not section:
+                    errors.append(f"{uid}.sources[{j}]: 缺少 section")
+            elif src_type == "logic_tree":
+                has_image = True
+                image_id = src.get("image_id", "")
+                if not image_id:
+                    errors.append(f"{uid}.sources[{j}]: logic_tree 缺少 image_id")
+                    continue
+                if image_id not in image_index:
+                    errors.append(
+                        f"{uid}.sources[{j}]: image_id '{image_id}' "
+                        f"在 image_analysis 中不存在"
+                    )
+                    continue
+                node_ids = src.get("logic_tree_nodes", [])
+                if node_ids and image_id in node_index:
+                    valid_nodes = node_index[image_id]
+                    for nid in node_ids:
+                        if nid not in valid_nodes:
+                            errors.append(
+                                f"{uid}.sources[{j}]: 节点 '{nid}' 在 "
+                                f"{image_id} 的逻辑树中不存在"
+                            )
+                elif not node_ids:
+                    errors.append(
+                        f"{uid}.sources[{j}]: logic_tree 类型但未提供 logic_tree_nodes"
+                    )
+
+        if not has_text and not has_image:
+            errors.append(f"{uid}: 必须至少引用一个文本或图片来源")
+
+    return errors
+
+
+def check_logic_tree_coverage(
+    units: list[dict], node_index: dict[str, set[str]]
+) -> list[str]:
+    """Check that decision and action nodes in logic trees are covered."""
+    warnings = []
+    for image_id, all_nodes in node_index.items():
+        referenced = set()
+        for fu in units:
+            for src in fu.get("sources", []):
+                if src.get("image_id") == image_id:
+                    for nid in src.get("logic_tree_nodes", []):
+                        referenced.add(nid)
+
+        uncovered = all_nodes - referenced
+        if uncovered:
+            doc = config.load_input_document()
+            node_types = {}
+            for img in doc.get("image_analysis", []):
+                if img.get("rid") == image_id:
+                    lt = img.get("logic_tree", {})
+                    for n in lt.get("nodes", []):
+                        node_types[n["id"]] = n.get("type", "?")
+                    break
+
+            decision_action_uncovered = [
+                n for n in uncovered if node_types.get(n) in ("decision", "action")
+            ]
+            if decision_action_uncovered:
+                warnings.append(
+                    f"{image_id}: {len(decision_action_uncovered)} 个 "
+                    f"decision/action 节点未被引用: {decision_action_uncovered}"
+                )
+
+    return warnings
+
+
+def check_ensemble_confidence(units: list[dict]) -> list[str]:
+    """Check that every function_unit has confidence, ensemble_support, source_versions."""
+    errors = []
+    valid_conf = {"high", "medium", "low"}
+    for fu in units:
+        uid = fu.get("unit_id", "?")
+        conf = fu.get("confidence", "")
+        if not conf:
+            errors.append(f"{uid}: 缺少 confidence 字段")
+        elif conf not in valid_conf:
+            errors.append(f"{uid}: confidence='{conf}' 无效 (期望 high/medium/low)")
+        support = fu.get("ensemble_support", "")
+        if not support:
+            errors.append(f"{uid}: 缺少 ensemble_support 字段")
+        if "source_versions" not in fu:
+            errors.append(f"{uid}: 缺少 source_versions 字段")
+    return errors
+
+
+def check_confidence_summary(si: dict) -> list[str]:
+    """Check that confidence_summary counts match actual unit/concept confidence."""
+    errors = []
+    cs = si.get("confidence_summary", {})
+    if not cs:
+        errors.append("缺少 confidence_summary 字段")
+        return errors
+
+    units = si.get("function_units", [])
+    concepts = si.get("concepts", [])
+
+    # Count actual confidence levels
+    unit_high = sum(1 for u in units if u.get("confidence") == "high")
+    unit_medium = sum(1 for u in units if u.get("confidence") == "medium")
+    unit_low = sum(1 for u in units if u.get("confidence") == "low")
+    concept_high = sum(1 for c in concepts if c.get("confidence") == "high")
+    concept_medium = sum(1 for c in concepts if c.get("confidence") == "medium")
+    concept_low = sum(1 for c in concepts if c.get("confidence") == "low")
+
+    if cs.get("total_units", 0) != len(units):
+        errors.append(f"confidence_summary.total_units={cs.get('total_units')} != 实际 {len(units)}")
+    if cs.get("high", 0) != unit_high:
+        errors.append(f"confidence_summary.high={cs.get('high')} != 实际 {unit_high}")
+    if cs.get("medium", 0) != unit_medium:
+        errors.append(f"confidence_summary.medium={cs.get('medium')} != 实际 {unit_medium}")
+    if cs.get("low", 0) != unit_low:
+        errors.append(f"confidence_summary.low={cs.get('low')} != 实际 {unit_low}")
+    if cs.get("total_concepts", 0) != len(concepts):
+        errors.append(f"confidence_summary.total_concepts={cs.get('total_concepts')} != 实际 {len(concepts)}")
+    if cs.get("concept_high", 0) != concept_high:
+        errors.append(f"confidence_summary.concept_high={cs.get('concept_high')} != 实际 {concept_high}")
+    if cs.get("concept_medium", 0) != concept_medium:
+        errors.append(f"confidence_summary.concept_medium={cs.get('concept_medium')} != 实际 {concept_medium}")
+    if cs.get("concept_low", 0) != concept_low:
+        errors.append(f"confidence_summary.concept_low={cs.get('concept_low')} != 实际 {concept_low}")
+
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 1 自检测试")
+    print("=" * 60)
+
+    si, doc = load_inputs()
+    units = si.get("function_units", [])
+    concepts = si.get("concepts", [])
+    image_index = build_image_index(doc)
+    node_index = build_logic_tree_node_index(doc)
+
+    all_errors = []
+    all_warnings = []
+
+    # Test 1: unit_id and name validity
+    errors = check_unit_ids(units)
+    if errors:
+        print(f"\n{FAIL} unit_id/name 检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} unit_id/name 检查: 全部通过 ({len(units)} 个功能单元)")
+
+    # Test 2: path fields
+    errors = check_unit_paths(units)
+    if errors:
+        print(f"\n{FAIL} path 字段检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} path 字段检查: 全部通过")
+
+    # Test 3: concept parent references
+    errors = check_concept_parents(concepts)
+    if errors:
+        print(f"\n{FAIL} concept parent 检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} concept parent 检查: 全部通过 ({len(concepts)} 个概念)")
+
+    # Test 4: source references exist
+    errors = check_sources_exist(units, image_index, node_index)
+    if errors:
+        print(f"\n{FAIL} 来源引用检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 来源引用检查: 全部通过")
+
+    # Test 5: Logic tree coverage
+    warnings = check_logic_tree_coverage(units, node_index)
+    if warnings:
+        print(f"\n{WARN} 逻辑树节点覆盖率: {len(warnings)} 个警告")
+        for w in warnings:
+            print(f"  - {w}")
+        all_warnings.extend(warnings)
+    else:
+        print(f"\n{PASS} 逻辑树节点覆盖率: 全部通过")
+
+    # Test 6: Ensemble confidence fields on function_units
+    errors = check_ensemble_confidence(units)
+    if errors:
+        print(f"\n{FAIL} 集成置信度字段: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 集成置信度字段: 全部通过")
+
+    # Test 7: Confidence summary consistency
+    errors = check_confidence_summary(si)
+    if errors:
+        print(f"\n{FAIL} confidence_summary 一致性: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        cs = si.get("confidence_summary", {})
+        print(f"\n{PASS} confidence_summary 一致性: "
+              f"high={cs.get('high',0)}, medium={cs.get('medium',0)}, "
+              f"low={cs.get('low',0)}")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+    total_warnings = len(all_warnings)
+
+    if total_failures == 0 and total_warnings == 0:
+        print(f"{PASS} 所有测试通过!")
+    elif total_failures == 0:
+        print(f"{WARN} 全部通过但有 {total_warnings} 个警告")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误, {total_warnings} 个警告")
+        print("\n请检查 LLM 输出质量，可能需要调整 Prompt 并重新运行 step1_semantic_index.py")
+
+    print(f"\n统计:")
+    print(f"  功能单元数: {len(units)}")
+    print(f"  概念数: {len(concepts)}")
+    print(f"  逻辑树图片数: {len(node_index)}")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)
@@ -0,0 +1,322 @@
+"""
+Tests for Stage 2 (IR Extraction).
+
+Validates that ir_fragments.json meets quality and structural requirements:
+- All fragments have non-empty rules
+- All rules have path arrays
+- All rules have precondition.geographic_scope and precondition.screen_type
+- All trigger conditions have signal/operator/value
+- user_interaction content is non-empty and not a placeholder
+- No duplicate rule_ids (across all fragments)
+"""
+
+import json
+import sys
+from pathlib import Path
+from collections import Counter
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+# Forbidden placeholder phrases in user_interaction content
+FORBIDDEN_PLACEHOLDERS = [
+    "文案由业务定义", "待定", "自定义", "TBD", "todo", "TODO"
+]
+
+
+def load_fragments():
+    """Load ir_fragments.json."""
+    try:
+        return config.load_json(config.IR_FRAGMENTS_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} ir_fragments.json 未找到: {config.IR_FRAGMENTS_JSON}")
+        print("  请先运行 step2_ir_extraction.py")
+        sys.exit(1)
+
+
+def check_non_empty_rules(fragments: list[dict]) -> list[str]:
+    """Every fragment must have at least one rule."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        rules = f.get("rules", [])
+        if not rules:
+            if f.get("error"):
+                errors.append(f"{uid}: 提取失败 — {f['error']}")
+            else:
+                errors.append(f"{uid}: rules 为空")
+    return errors
+
+
+def check_rule_paths(fragments: list[dict]) -> list[str]:
+    """Every rule must have a non-empty path array."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            path = rule.get("path", [])
+            if not path:
+                errors.append(f"{rid}: path 字段为空或缺失")
+            elif not isinstance(path, list):
+                errors.append(f"{rid}: path 必须是数组")
+    return errors
+
+
+def check_precondition_fields(fragments: list[dict]) -> list[str]:
+    """Every rule must have precondition with geographic_scope and screen_type."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            precond = rule.get("precondition", {})
+            if not precond:
+                errors.append(f"{rid}: precondition 缺失")
+                continue
+            if not precond.get("geographic_scope"):
+                errors.append(f"{rid}: precondition.geographic_scope 缺失")
+            if "screen_type" not in precond:
+                errors.append(f"{rid}: precondition.screen_type 缺失")
+    return errors
+
+
+def check_user_interaction_content(fragments: list[dict]) -> list[str]:
+    """user_interaction actions must have non-empty, non-placeholder content."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            for k, action in enumerate(rule.get("actions", [])):
+                if action.get("type") != "user_interaction":
+                    continue
+                content = action.get("content", "")
+                if not content:
+                    errors.append(
+                        f"{rid}.actions[{k}]: user_interaction 的 content 为空"
+                    )
+                elif any(ph in content for ph in FORBIDDEN_PLACEHOLDERS):
+                    errors.append(
+                        f"{rid}.actions[{k}]: content 包含占位符: '{content}'"
+                    )
+    return errors
+
+
+def check_sources_have_logic_tree_nodes(fragments: list[dict]) -> list[str]:
+    """Every rule should reference at least one logic tree node in its sources."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            sources = rule.get("sources", [])
+            has_logic_tree = any(
+                src.get("type") == "logic_tree" and src.get("node_ids")
+                for src in sources
+            )
+            if not has_logic_tree:
+                has_text = any(
+                    src.get("type") in ("table", "para") for src in sources
+                )
+                if not has_text:
+                    errors.append(f"{rid}: sources 中既无逻辑树引用也无文字引用")
+    return errors
+
+
+def check_trigger_conditions(fragments: list[dict]) -> list[str]:
+    """Every trigger condition must have signal, operator, value."""
+    errors = []
+    for f in fragments:
+        uid = f.get("unit_id", "?")
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            trigger = rule.get("trigger", {})
+            conditions = trigger.get("conditions", [])
+
+            if trigger.get("event") is not None:
+                continue
+
+            for k, cond in enumerate(conditions):
+                signal = cond.get("signal", "")
+                operator = cond.get("operator", "")
+                has_value = "value" in cond
+
+                if not signal:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 signal")
+                if not operator:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 operator")
+                if not has_value:
+                    errors.append(f"{rid}.condition[{k}]: 缺少 value")
+
+    return errors
+
+
+def check_duplicate_rule_ids(fragments: list[dict]) -> list[str]:
+    """Check for duplicate rule_ids across all fragments."""
+    all_rule_ids = []
+    for f in fragments:
+        for rule in f.get("rules", []):
+            rid = rule.get("rule_id", "")
+            if rid:
+                all_rule_ids.append(rid)
+
+    duplicates = [rid for rid, count in Counter(all_rule_ids).items() if count > 1]
+    errors = []
+    if duplicates:
+        errors.append(f"重复 rule_id: {duplicates}")
+    return errors
+
+
+def check_action_types(fragments: list[dict]) -> list[str]:
+    """Verify that actions have valid types."""
+    valid_types = {"system", "user_interaction"}
+    errors = []
+    for f in fragments:
+        for j, rule in enumerate(f.get("rules", [])):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            for k, action in enumerate(rule.get("actions", [])):
+                atype = action.get("type", "")
+                if atype not in valid_types:
+                    errors.append(
+                        f"{rid}.action[{k}]: type='{atype}' 无效, "
+                        f"应为 {valid_types}"
+                    )
+                if atype == "user_interaction" and "content" not in action:
+                    errors.append(
+                        f"{rid}.action[{k}]: user_interaction 类型缺少 content 字段"
+                    )
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 2 自检测试")
+    print("=" * 60)
+
+    fragments = load_fragments()
+    all_errors = []
+    total_units = len(fragments)
+    total_rules = sum(len(f.get("rules", [])) for f in fragments)
+
+    # Test 1: Non-empty rules
+    errors = check_non_empty_rules(fragments)
+    if errors:
+        print(f"\n{FAIL} 非空规则检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 非空规则检查: 全部通过 ({total_units} 个片段)")
+
+    # Test 2: Rule path arrays
+    errors = check_rule_paths(fragments)
+    if errors:
+        print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则 path 字段: 全部通过")
+
+    # Test 3: Precondition fields
+    errors = check_precondition_fields(fragments)
+    if errors:
+        print(f"\n{FAIL} precondition 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} precondition 字段: 全部通过")
+
+    # Test 4: user_interaction content
+    errors = check_user_interaction_content(fragments)
+    if errors:
+        print(f"\n{FAIL} user_interaction content: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} user_interaction content: 全部通过")
+
+    # Test 5: Sources have logic tree references
+    errors = check_sources_have_logic_tree_nodes(fragments)
+    if errors:
+        print(f"\n{FAIL} 来源节点引用: {len(errors)} 个规则缺少来源引用")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 来源节点引用: 全部通过")
+
+    # Test 6: Trigger conditions completeness
+    errors = check_trigger_conditions(fragments)
+    if errors:
+        print(f"\n{FAIL} 触发条件完整性: {len(errors)} 个条件不完整")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 触发条件完整性: 全部通过")
+
+    # Test 7: No duplicate rule_ids
+    errors = check_duplicate_rule_ids(fragments)
+    if errors:
+        print(f"\n{FAIL} rule_id 唯一性: 发现重复")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} rule_id 唯一性: 全部通过")
+
+    # Test 8: Valid action types
+    errors = check_action_types(fragments)
+    if errors:
+        print(f"\n{FAIL} 动作类型检查: {len(errors)} 个问题")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 动作类型检查: 全部通过")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+
+    if total_failures == 0:
+        print(f"{PASS} 所有测试通过!")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误")
+        print("\n建议:")
+        print("  1. 检查 ir_fragments.json 中出错的规则")
+        print("  2. 如果某些功能单元的规则为空，检查上下文包是否丢失了关键信息")
+        print("  3. 调整 Prompt (prompts/step2_ir_extraction.txt) 后重新运行")
+
+    print(f"\n统计:")
+    print(f"  功能单元数: {total_units}")
+    print(f"  规则总数: {total_rules}")
+    error_units = sum(1 for f in fragments if f.get("error"))
+    if error_units:
+        print(f"  提取失败的单元: {error_units}")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)
@@ -0,0 +1,152 @@
+"""
+Tests for Stage 2.5 (Branch Coverage Auto-Completion).
+
+Validates:
+- Path enumeration exists and is non-empty
+- Auto-complete fragments have valid structure
+- No duplicate unit_ids in autocomplete fragments
+- Path coverage improved after autocomplete (if applicable)
+"""
+
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+
+def load_path_enumeration():
+    """Load path_enumeration.json."""
+    try:
+        return config.load_json(config.PATH_ENUM_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} path_enumeration.json 未找到: {config.PATH_ENUM_JSON}")
+        print("  请先运行 step2_5_branch_coverage.py")
+        sys.exit(1)
+
+
+def load_autocomplete_fragments():
+    """Load ir_autocomplete_fragments.json, or return [] if absent."""
+    path = config.IR_AUTOCOMPLETE_FRAGMENTS_JSON
+    if not Path(path).exists():
+        return None
+    return config.load_json(path)
+
+
+def check_path_enumeration(data: dict) -> list[str]:
+    """Check path enumeration has valid structure."""
+    errors = []
+    paths = data.get("logic_tree_paths", {})
+    if not paths:
+        errors.append("logic_tree_paths 为空")
+    total = data.get("total_paths", 0)
+    if total <= 0:
+        errors.append(f"total_paths = {total}, 期望 > 0")
+
+    for image_id, image_paths in paths.items():
+        if not image_paths:
+            errors.append(f"{image_id}: 路径列表为空")
+            continue
+        for i, p in enumerate(image_paths):
+            if not p.get("path_id"):
+                errors.append(f"{image_id}[{i}]: 缺少 path_id")
+            if not p.get("image_id"):
+                errors.append(f"{image_id}[{i}]: 缺少 image_id")
+            if not p.get("node_ids"):
+                errors.append(f"{image_id}[{i}]: 缺少 node_ids")
+
+    return errors
+
+
+def check_autocomplete_fragments(fragments: list[dict] | None) -> list[str]:
+    """Check auto-complete fragments have valid structure."""
+    if fragments is None:
+        return ["ir_autocomplete_fragments.json 未生成 (可能无需补全)"]
+
+    errors = []
+    seen_unit_ids = set()
+
+    for frag in fragments:
+        uid = frag.get("unit_id", "")
+        if not uid:
+            errors.append("fragment 缺少 unit_id")
+            continue
+        if uid in seen_unit_ids:
+            errors.append(f"unit_id '{uid}' 重复")
+        seen_unit_ids.add(uid)
+
+        if not frag.get("auto_generated"):
+            errors.append(f"{uid}: auto_generated 应为 true")
+
+        rules = frag.get("rules", [])
+        for j, rule in enumerate(rules):
+            rid = rule.get("rule_id", f"rule[{j}]")
+            if not rule.get("path"):
+                errors.append(f"{rid}: path 字段缺失")
+            precond = rule.get("precondition", {})
+            if not precond.get("geographic_scope"):
+                errors.append(f"{rid}: precondition.geographic_scope 缺失")
+
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 2.5 自检测试")
+    print("=" * 60)
+
+    all_errors = []
+
+    # Test 1: Path enumeration exists
+    try:
+        path_data = load_path_enumeration()
+    except SystemExit:
+        return False
+
+    errors = check_path_enumeration(path_data)
+    if errors:
+        print(f"\n{FAIL} 路径枚举检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        total = path_data.get("total_paths", 0)
+        n_images = len(path_data.get("logic_tree_paths", {}))
+        print(f"\n{PASS} 路径枚举检查: {total} 条路径, {n_images} 个逻辑树")
+
+    # Test 2: Auto-complete fragments
+    fragments = load_autocomplete_fragments()
+    errors = check_autocomplete_fragments(fragments)
+
+    if fragments is None:
+        print(f"\n{WARN} 自动补全片段: 未生成 (可能所有路径已覆盖)")
+    elif errors:
+        print(f"\n{FAIL} 自动补全片段检查: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        auto_rules = sum(len(f.get("rules", [])) for f in fragments)
+        print(f"\n{PASS} 自动补全片段检查: "
+              f"{len(fragments)} 个片段, {auto_rules} 条规则")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+
+    if total_failures == 0:
+        print(f"{PASS} 所有测试通过!")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)
@@ -0,0 +1,232 @@
+"""
+Tests for Stage 3 (Merge & Audit).
+
+Validates:
+- ir_final.json exists and is well-formed
+- No duplicate rule_ids
+- All rule_ids follow new hierarchical naming convention
+- All rules have path arrays
+- ir_audit_report.md exists and contains all required sections
+"""
+
+import re
+import sys
+from pathlib import Path
+from collections import Counter
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+import config
+
+
+PASS = "[PASS]"
+FAIL = "[FAIL]"
+WARN = "[WARN]"
+
+
+def load_ir_final():
+    """Load ir_final.json."""
+    try:
+        return config.load_json(config.IR_FINAL_JSON)
+    except FileNotFoundError:
+        print(f"{FAIL} ir_final.json 未找到: {config.IR_FINAL_JSON}")
+        print("  请先运行 step3_merge_and_audit.py")
+        sys.exit(1)
+
+
+def load_audit_report():
+    """Load ir_audit_report.md if it exists."""
+    try:
+        with open(config.IR_AUDIT_REPORT_MD, "r", encoding="utf-8") as f:
+            return f.read()
+    except FileNotFoundError:
+        print(f"{FAIL} ir_audit_report.md 未找到: {config.IR_AUDIT_REPORT_MD}")
+        print("  请先运行 step3_merge_and_audit.py")
+        sys.exit(1)
+
+
+def check_rule_ids(ir: dict) -> list[str]:
+    """Check for duplicate rule_ids and hierarchical naming convention.
+
+    Format: DRL-001-DOMESTIC-SYS-FG-INTERRUPT-01
+    """
+    errors = []
+    rules = ir.get("rules", [])
+    rule_ids = [r.get("rule_id", "") for r in rules]
+
+    # No duplicates
+    duplicates = [rid for rid, count in Counter(rule_ids).items() if count > 1]
+    if duplicates:
+        errors.append(f"重复 rule_id: {duplicates}")
+
+    # New hierarchical naming convention
+    pattern = re.compile(
+        r"^[A-Z]+-\d{3}-(DOMESTIC|OVERSEAS)-"
+        r"(SYS|SDK|OTHER)-"
+        r"(FG-INTERRUPT|BG-BLOCK|BG-PAUSE|NO-RESTRICT|SWITCH-OFF)-\d{2}$"
+    )
+    for rid in rule_ids:
+        if rid and not pattern.match(rid):
+            errors.append(
+                f"rule_id 命名不规范: '{rid}' "
+                f"(期望: FEATURE-SCOPE-METHOD-BEHAVIOR-NN)"
+            )
+
+    return errors
+
+
+def check_top_level_structure(ir: dict) -> list[str]:
+    """Check that ir_final has the required top-level fields."""
+    errors = []
+    for field in ["feature", "feature_id", "rules"]:
+        if field not in ir:
+            errors.append(f"ir_final 缺少顶层字段: {field}")
+
+    if not isinstance(ir.get("rules"), list):
+        errors.append("ir_final.rules 必须是数组")
+    elif len(ir["rules"]) == 0:
+        errors.append("ir_final.rules 为空")
+
+    return errors
+
+
+def check_rule_paths(rules: list[dict]) -> list[str]:
+    """Every rule must have a non-empty path array."""
+    errors = []
+    for rule in rules:
+        rid = rule.get("rule_id", "?")
+        path = rule.get("path", [])
+        if not path:
+            errors.append(f"{rid}: path 字段为空或缺失")
+    return errors
+
+
+def check_rule_completeness(rules: list[dict]) -> list[str]:
+    """Check each rule has all required fields."""
+    errors = []
+    required_fields = [
+        "rule_id", "description", "priority", "sources",
+        "precondition", "trigger", "actions"
+    ]
+    for i, rule in enumerate(rules):
+        rid = rule.get("rule_id", f"rule[{i}]")
+        for field in required_fields:
+            if field not in rule:
+                errors.append(f"{rid}: 缺少字段 '{field}'")
+        if not rule.get("sources"):
+            errors.append(f"{rid}: sources 为空")
+        if not rule.get("actions"):
+            errors.append(f"{rid}: actions 为空")
+        # Check precondition fields
+        precond = rule.get("precondition", {})
+        if not precond.get("geographic_scope"):
+            errors.append(f"{rid}: precondition.geographic_scope 缺失")
+        if "screen_type" not in precond:
+            errors.append(f"{rid}: precondition.screen_type 缺失")
+    return errors
+
+
+def check_audit_report(report: str) -> list[str]:
+    """Check audit report has all required sections."""
+    errors = []
+
+    required_sections = [
+        "逻辑树路径覆盖率",
+        "表格枚举覆盖",
+        "开关状态",
+        "一致性扫描报告",
+        "自动补全摘要",
+        "规则清单",
+    ]
+    for section in required_sections:
+        if section not in report:
+            errors.append(f"审计报告缺少章节: {section}")
+
+    # Should have the human review notice
+    if "人工审查" not in report:
+        errors.append("审计报告缺少人工审查提示")
+
+    return errors
+
+
+def run_all_tests():
+    print("=" * 60)
+    print("Step 3 自检测试")
+    print("=" * 60)
+
+    ir = load_ir_final()
+    report = load_audit_report()
+    rules = ir.get("rules", [])
+    all_errors = []
+
+    # Test 1: Top-level structure
+    errors = check_top_level_structure(ir)
+    if errors:
+        print(f"\n{FAIL} 顶层结构检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 顶层结构检查: 通过 "
+              f"(feature={ir.get('feature')}, feature_id={ir.get('feature_id')})")
+
+    # Test 2: rule_id uniqueness and naming
+    errors = check_rule_ids(ir)
+    if errors:
+        print(f"\n{FAIL} rule_id 检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} rule_id 检查: 全部通过 ({len(rules)} 个唯一 ID, 层次化格式)")
+
+    # Test 3: Rule path fields
+    errors = check_rule_paths(rules)
+    if errors:
+        print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则 path 字段: 全部通过")
+
+    # Test 4: Rule field completeness
+    errors = check_rule_completeness(rules)
+    if errors:
+        print(f"\n{FAIL} 规则字段完整性: {len(errors)} 个错误")
+        for e in errors[:10]:
+            print(f"  - {e}")
+        if len(errors) > 10:
+            print(f"  ... 还有 {len(errors) - 10} 个")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 规则字段完整性: 全部通过")
+
+    # Test 5: Audit report content
+    errors = check_audit_report(report)
+    if errors:
+        print(f"\n{FAIL} 审计报告检查: {len(errors)} 个错误")
+        for e in errors:
+            print(f"  - {e}")
+        all_errors.extend(errors)
+    else:
+        print(f"\n{PASS} 审计报告检查: 全部通过 (6 个章节)")
+
+    # Summary
+    print(f"\n{'='*60}")
+    total_failures = len(all_errors)
+
+    if total_failures == 0:
+        print(f"{PASS} 所有测试通过!")
+        print(f"\n最终交付物:")
+        print(f"  - {config.IR_FINAL_JSON} ({len(rules)} 条规则)")
+        print(f"  - {config.IR_AUDIT_REPORT_MD}")
+    else:
+        print(f"{FAIL} 测试失败: {total_failures} 个错误")
+        print("\n建议: 检查 ir_fragments.json 和合并逻辑，修复问题后重新运行 step3_merge_and_audit.py")
+
+    return total_failures == 0
+
+
+if __name__ == "__main__":
+    success = run_all_tests()
+    sys.exit(0 if success else 1)