fix: step3 _normalize_rule 为空 sources 的 rule 添加最小 text source - Closes #64

防御性处理 LLM 输出中 sources 为空数组的情况，避免 Layer A schema 失败。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 17:25:12 +08:00
parent c091b6c256
commit a24408521c
2 changed files with 33 additions and 15 deletions
@@ -172,30 +172,36 @@ def _normalize_rule(rule: dict) -> dict:
    # Ensure table/text sources have a section field (defensive against LLM omission)
    # Also normalize invalid source types (LLM hallucinations like function_unit_description)
    sources = rule.get("sources", [])
+    valid_types = {"table", "text", "logic_tree"}
+
+    # try to infer a default section from the rule path
+    default_section = ""
+    for s in sources:
+        sec = s.get("section", "")
+        if sec and sec.strip():
+            default_section = sec.strip()
+            break
+    if not default_section:
+        path = rule.get("path", "")
+        if path:
+            default_section = path.split(" > ")[0] if " > " in path else path
+
    if sources:
-        valid_types = {"table", "text", "logic_tree"}
-
-        # try to infer a default section from sibling sources or the rule path
-        default_section = ""
-        for s in sources:
-            sec = s.get("section", "")
-            if sec and sec.strip():
-                default_section = sec.strip()
-                break
-        if not default_section:
-            path = rule.get("path", "")
-            if path:
-                default_section = path.split(" > ")[0] if " > " in path else path
-
        for src in sources:
            stype = src.get("type", "")
-            # Normalize invalid source types to "text"
            if stype and stype not in valid_types:
                src["type"] = "text"
                stype = "text"
            if stype in ("table", "text"):
                if not src.get("section"):
                    src["section"] = default_section
+    else:
+        # Empty sources list — add a minimal text source (defensive against schema failure)
+        src = {"type": "text", "text_snippet": "inferred from rule context"}
+        if default_section:
+            src["section"] = default_section
+        sources.append(src)
+        rule["sources"] = sources

    return rule