From efb5ed481ea82a237cf4e9752865ccc474a94385 Mon Sep 17 00:00:00 2001
From: Peter Zhang <18501667167@qq.com>
Date: Tue, 2 Jun 2026 17:44:56 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20step3=20=5Fnormalize=5Frule=20=E5=A4=84?=
 =?UTF-8?q?=E7=90=86=20section=20=E4=B8=BA=20list=20=E7=9A=84=20LLM=20?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E9=97=AE=E9=A2=98=20-=20Closes=20#69?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLM 输出 section 字段有时为 list 而非 string，导致 .strip() 崩溃。
添加 _clean_section() 将 list→首元素 string，空 list 回退到 rule path。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../step3_merge_and_audit.py                  | 16 +++++++++++-
 .../ir_generation_skill/tests/test_step3.py   | 25 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/skills/ir_generation_skill/step3_merge_and_audit.py b/skills/ir_generation_skill/step3_merge_and_audit.py
index c3737f9..3b2fb87 100644
--- a/skills/ir_generation_skill/step3_merge_and_audit.py
+++ b/skills/ir_generation_skill/step3_merge_and_audit.py
@@ -174,11 +174,25 @@ def _normalize_rule(rule: dict) -> dict:
     sources = rule.get("sources", [])
     valid_types = {"table", "text", "logic_tree"}
 
+    def _clean_section(val):
+        """Normalize section value: list→first element, ensure string."""
+        if isinstance(val, list):
+            return str(val[0]).strip() if val else ""
+        if isinstance(val, str):
+            return val.strip()
+        return str(val).strip() if val else ""
+
+    # Normalize section fields that might be lists (LLM format instability)
+    for s in sources:
+        sec = s.get("section")
+        if sec is not None:
+            s["section"] = _clean_section(sec)
+
     # try to infer a default section from the rule path
     default_section = ""
     for s in sources:
         sec = s.get("section", "")
-        if sec and sec.strip():
+        if sec and isinstance(sec, str) and sec.strip():
             default_section = sec.strip()
             break
     if not default_section:
diff --git a/skills/ir_generation_skill/tests/test_step3.py b/skills/ir_generation_skill/tests/test_step3.py
index f54bae1..8bdc4e5 100644
--- a/skills/ir_generation_skill/tests/test_step3.py
+++ b/skills/ir_generation_skill/tests/test_step3.py
@@ -538,3 +538,28 @@ class TestNormalizeRule:
         assert len(normalized["sources"]) == 1
         assert normalized["sources"][0]["type"] == "text"
         assert normalized["sources"][0]["section"] == "3.1 策略"
+
+    def test_normalize_section_is_list(self):
+        """Section field that is a list (LLM format bug) is normalized to string."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "sources": [
+                {"type": "table", "section": ["状态", "系统设置"], "row": 1},
+                {"type": "text", "section": ["后台限制"], "text_snippet": "x"},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert normalized["sources"][0]["section"] == "状态"
+        assert normalized["sources"][1]["section"] == "后台限制"
+
+    def test_normalize_section_is_empty_list(self):
+        """Empty list section falls back to rule path."""
+        rule = {
+            "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
+            "path": "4.2 关闭流程 > decision",
+            "sources": [
+                {"type": "table", "section": [], "row": 1},
+            ],
+        }
+        normalized = _normalize_rule(rule)
+        assert normalized["sources"][0]["section"] == "4.2 关闭流程"
-- 
2.52.0