From ebda8e37d1cd66c67b60e3153ff018c5c3aa733a Mon Sep 17 00:00:00 2001 From: Peter Zhang <18501667167@qq.com> Date: Mon, 1 Jun 2026 13:15:19 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20step1=20=E7=A9=BA=E7=AB=A0=E8=8A=82?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=20+=20step3=20rule=5Fsignature=20None-safe?= =?UTF-8?q?=20-=20Closes=20#21?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - step1 _quick_validate 添加 _has_section_content() 过滤空内容章节 (如仅含"无"字的图片章节),避免误报低覆盖率警告 - step3 rule_signature 使用 `or {}` 防御 trigger=None 场景 修复 QE 报告的 step3 AttributeError Co-Authored-By: Claude Opus 4.7 --- .../step1_semantic_index.py | 20 ++++++++++++++++++- .../step3_merge_and_audit.py | 4 ++-- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/skills/ir_generation_skill/step1_semantic_index.py b/skills/ir_generation_skill/step1_semantic_index.py index fe10bb7..1c071ee 100644 --- a/skills/ir_generation_skill/step1_semantic_index.py +++ b/skills/ir_generation_skill/step1_semantic_index.py @@ -509,10 +509,28 @@ def _quick_validate( return True return True + def _has_section_content(sec: dict) -> bool: + """Check if a section has meaningful content (text >= 10 chars, table, or image). + + A section is considered "empty" if all its text blocks have fewer than + 10 characters and it contains no tables or images. These typically come + from image-only Word sections that doc_parser cannot extract text from. + """ + for block in sec.get("blocks", []): + blk_type = block.get("type", "") + if blk_type == "table": + return True + if blk_type in ("image", "figure", "picture"): + return True + text = block.get("text", "") + if isinstance(text, str) and len(text.strip()) >= 10: + return True + return False + func_sections = [ s for s in doc.get("sections", []) if _is_functional_section(s.get("source", "")) - and any(b.get("type") in ("para", "table") for b in s.get("blocks", [])) + and _has_section_content(s) ] covered_sections: set[str] = set() for fu in units: diff --git a/skills/ir_generation_skill/step3_merge_and_audit.py b/skills/ir_generation_skill/step3_merge_and_audit.py index 4401d16..9d166ae 100644 --- a/skills/ir_generation_skill/step3_merge_and_audit.py +++ b/skills/ir_generation_skill/step3_merge_and_audit.py @@ -111,8 +111,8 @@ def load_path_enumeration() -> dict: def rule_signature(rule: dict) -> str: """Generate a dedup signature from path + trigger + actions.""" path = rule.get("path", []) - trigger = rule.get("trigger", {}) - actions = rule.get("actions", []) + trigger = rule.get("trigger") or {} + actions = rule.get("actions") or [] conditions = sorted( trigger.get("conditions", []), key=lambda c: c.get("signal", "")