From a24408521cb896e05cebdd9fe03855c257f20b4d Mon Sep 17 00:00:00 2001
From: Peter Zhang <18501667167@qq.com>
Date: Tue, 2 Jun 2026 17:25:12 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20step3=20=5Fnormalize=5Frule=20=E4=B8=BA?=
 =?UTF-8?q?=E7=A9=BA=20sources=20=E7=9A=84=20rule=20=E6=B7=BB=E5=8A=A0?=
 =?UTF-8?q?=E6=9C=80=E5=B0=8F=20text=20source=20-=20Closes=20#64?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

防御性处理 LLM 输出中 sources 为空数组的情况，避免 Layer A schema 失败。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../step3_merge_and_audit.py                  | 36 +++++++++++--------
 .../ir_generation_skill/tests/test_step3.py   | 12 +++++++
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/skills/ir_generation_skill/step3_merge_and_audit.py b/skills/ir_generation_skill/step3_merge_and_audit.py
index 5ce94d6..c3737f9 100644
--- a/skills/ir_generation_skill/step3_merge_and_audit.py
+++ b/skills/ir_generation_skill/step3_merge_and_audit.py
@@ -172,30 +172,36 @@ def _normalize_rule(rule: dict) -> dict:
     # Ensure table/text sources have a section field (defensive against LLM omission)
     # Also normalize invalid source types (LLM hallucinations like function_unit_description)
     sources = rule.get("sources", [])
+    valid_types = {"table", "text", "logic_tree"}
+
+    # try to infer a default section from the rule path
+    default_section = ""
+    for s in sources:
+        sec = s.get("section", "")
+        if sec and sec.strip():
+            default_section = sec.strip()
+            break
+    if not default_section:
+        path = rule.get("path", "")
+        if path:
+            default_section = path.split(" > ")[0] if " > " in path else path
+
     if sources:
-        valid_types = {"table", "text", "logic_tree"}
-
-        # try to infer a default section from sibling sources or the rule path
-        default_section = ""
-        for s in sources:
-            sec = s.get("section", "")
-            if sec and sec.strip():
-                default_section = sec.strip()
-                break
-        if not default_section:
-            path = rule.get("path", "")
-            if path:
-                default_section = path.split(" > ")[0] if " > " in path else path
-
         for src in sources:
             stype = src.get("type", "")
-            # Normalize invalid source types to "text"
             if stype and stype not in valid_types:
                 src["type"] = "text"
                 stype = "text"
             if stype in ("table", "text"):
                 if not src.get("section"):
                     src["section"] = default_section
+    else:
+        # Empty sources list — add a minimal text source (defensive against schema failure)
+        src = {"type": "text", "text_snippet": "inferred from rule context"}
+        if default_section:
+            src["section"] = default_section
+        sources.append(src)
+        rule["sources"] = sources
 
     return rule
 
diff --git a/skills/ir_generation_skill/tests/test_step3.py b/skills/ir_generation_skill/tests/test_step3.py
index fa6e9e8..f54bae1 100644
--- a/skills/ir_generation_skill/tests/test_step3.py
+++ b/skills/ir_generation_skill/tests/test_step3.py
@@ -526,3 +526,15 @@ class TestNormalizeRule:
         assert normalized["sources"][0]["type"] == "text"
         assert normalized["sources"][1]["type"] == "text"
         assert normalized["sources"][0]["section"] == "3.1 功能"
+
+    def test_normalize_empty_sources(self):
+        """Rules with empty sources get a minimal text source (defensive)."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "path": "3.1 策略 > decision_speed",
+            "sources": [],
+        }
+        normalized = _normalize_rule(rule)
+        assert len(normalized["sources"]) == 1
+        assert normalized["sources"][0]["type"] == "text"
+        assert normalized["sources"][0]["section"] == "3.1 策略"
-- 
2.52.0