From 1477dbdd18b7de2bd5a085b87f22258d36607df5 Mon Sep 17 00:00:00 2001
From: Peter Zhang <18501667167@qq.com>
Date: Tue, 2 Jun 2026 15:46:59 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20step3=20=5Fnormalize=5Frule=20=E4=B8=BA?=
 =?UTF-8?q?=E7=BC=BA=E5=A4=B1=20section=20=E7=9A=84=20table/text=20source?=
 =?UTF-8?q?=20=E8=A1=A5=E9=BD=90=E5=AD=97=E6=AE=B5=20-=20Closes=20#53?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLM 生成的 source 有时缺少 section 字段，导致 Layer A schema 验证失败。
在 _normalize_rule 中添加防御性处理：从兄弟 source 或 rule path 推断 section。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../step3_merge_and_audit.py                  | 21 +++++++++
 .../ir_generation_skill/tests/test_step3.py   | 46 +++++++++++++++++++
 2 files changed, 67 insertions(+)

diff --git a/skills/ir_generation_skill/step3_merge_and_audit.py b/skills/ir_generation_skill/step3_merge_and_audit.py
index 966ee4a..df8a87d 100644
--- a/skills/ir_generation_skill/step3_merge_and_audit.py
+++ b/skills/ir_generation_skill/step3_merge_and_audit.py
@@ -169,6 +169,27 @@ def _normalize_rule(rule: dict) -> dict:
             "value": "active"
         }]
 
+    # Ensure table/text sources have a section field (defensive against LLM omission)
+    sources = rule.get("sources", [])
+    if sources:
+        # try to infer a default section from sibling sources or the rule path
+        default_section = ""
+        for s in sources:
+            sec = s.get("section", "")
+            if sec and sec.strip():
+                default_section = sec.strip()
+                break
+        if not default_section:
+            path = rule.get("path", "")
+            if path:
+                default_section = path.split(" > ")[0] if " > " in path else path
+
+        for src in sources:
+            stype = src.get("type", "")
+            if stype in ("table", "text"):
+                if not src.get("section"):
+                    src["section"] = default_section
+
     return rule
 
 
diff --git a/skills/ir_generation_skill/tests/test_step3.py b/skills/ir_generation_skill/tests/test_step3.py
index c8b8fd5..00ae150 100644
--- a/skills/ir_generation_skill/tests/test_step3.py
+++ b/skills/ir_generation_skill/tests/test_step3.py
@@ -465,3 +465,49 @@ class TestNormalizeRule:
         normalized = _normalize_rule(rule)
         assert normalized["trigger"]["operator"] == "AND"
         assert normalized["trigger"]["conditions"][0]["operator"] == ">="
+
+    def test_normalize_source_missing_section_from_sibling(self):
+        """Table/text sources without section get it from sibling sources."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "sources": [
+                {"type": "table", "section": "3.1.1 系统限制", "row": 1},
+                {"type": "text", "text_snippet": "missing section"},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert normalized["sources"][1]["section"] == "3.1.1 系统限制"
+
+    def test_normalize_source_missing_section_from_path(self):
+        """Table/text sources without section and no sibling fall back to rule path."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "path": "4.2 关闭流程 > decision_speed > action_disable",
+            "sources": [
+                {"type": "table", "row": 3, "text_snippet": "no section anywhere"},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert normalized["sources"][0]["section"] == "4.2 关闭流程"
+
+    def test_normalize_source_keeps_existing_section(self):
+        """Sources that already have section are not modified."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "sources": [
+                {"type": "table", "section": "1.0 概述", "row": 1},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert normalized["sources"][0]["section"] == "1.0 概述"
+
+    def test_normalize_source_skips_logic_tree(self):
+        """Logic tree sources are not touched (don't need section)."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "sources": [
+                {"type": "logic_tree", "image_id": "img1", "node_ids": ["n1"]},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert "section" not in normalized["sources"][0]