From a24408521cb896e05cebdd9fe03855c257f20b4d Mon Sep 17 00:00:00 2001 From: Peter Zhang <18501667167@qq.com> Date: Tue, 2 Jun 2026 17:25:12 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20step3=20=5Fnormalize=5Frule=20=E4=B8=BA?= =?UTF-8?q?=E7=A9=BA=20sources=20=E7=9A=84=20rule=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E6=9C=80=E5=B0=8F=20text=20source=20-=20Closes=20#64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 防御性处理 LLM 输出中 sources 为空数组的情况,避免 Layer A schema 失败。 Co-Authored-By: Claude Opus 4.7 --- .../step3_merge_and_audit.py | 36 +++++++++++-------- .../ir_generation_skill/tests/test_step3.py | 12 +++++++ 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/skills/ir_generation_skill/step3_merge_and_audit.py b/skills/ir_generation_skill/step3_merge_and_audit.py index 5ce94d6..c3737f9 100644 --- a/skills/ir_generation_skill/step3_merge_and_audit.py +++ b/skills/ir_generation_skill/step3_merge_and_audit.py @@ -172,30 +172,36 @@ def _normalize_rule(rule: dict) -> dict: # Ensure table/text sources have a section field (defensive against LLM omission) # Also normalize invalid source types (LLM hallucinations like function_unit_description) sources = rule.get("sources", []) + valid_types = {"table", "text", "logic_tree"} + + # try to infer a default section from the rule path + default_section = "" + for s in sources: + sec = s.get("section", "") + if sec and sec.strip(): + default_section = sec.strip() + break + if not default_section: + path = rule.get("path", "") + if path: + default_section = path.split(" > ")[0] if " > " in path else path + if sources: - valid_types = {"table", "text", "logic_tree"} - - # try to infer a default section from sibling sources or the rule path - default_section = "" - for s in sources: - sec = s.get("section", "") - if sec and sec.strip(): - default_section = sec.strip() - break - if not default_section: - path = rule.get("path", "") - if path: - default_section = path.split(" > ")[0] if " > " in path else path - for src in sources: stype = src.get("type", "") - # Normalize invalid source types to "text" if stype and stype not in valid_types: src["type"] = "text" stype = "text" if stype in ("table", "text"): if not src.get("section"): src["section"] = default_section + else: + # Empty sources list — add a minimal text source (defensive against schema failure) + src = {"type": "text", "text_snippet": "inferred from rule context"} + if default_section: + src["section"] = default_section + sources.append(src) + rule["sources"] = sources return rule diff --git a/skills/ir_generation_skill/tests/test_step3.py b/skills/ir_generation_skill/tests/test_step3.py index fa6e9e8..f54bae1 100644 --- a/skills/ir_generation_skill/tests/test_step3.py +++ b/skills/ir_generation_skill/tests/test_step3.py @@ -526,3 +526,15 @@ class TestNormalizeRule: assert normalized["sources"][0]["type"] == "text" assert normalized["sources"][1]["type"] == "text" assert normalized["sources"][0]["section"] == "3.1 功能" + + def test_normalize_empty_sources(self): + """Rules with empty sources get a minimal text source (defensive).""" + rule = { + "trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]}, + "path": "3.1 策略 > decision_speed", + "sources": [], + } + normalized = _normalize_rule(rule) + assert len(normalized["sources"]) == 1 + assert normalized["sources"][0]["type"] == "text" + assert normalized["sources"][0]["section"] == "3.1 策略" -- 2.52.0