From cb15e7abd07d526cedf075e6a7fe9cfec0286a1a Mon Sep 17 00:00:00 2001 From: Peter Zhang <18501667167@qq.com> Date: Sun, 31 May 2026 19:57:08 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20step1=20=5Fquick=5Fvalidate=20=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=20section/table=20=E8=A6=86=E7=9B=96=E7=8E=87?= =?UTF-8?q?=E6=A3=80=E6=9F=A5=20-=20Closes=20#21?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增章节覆盖率检查(functional sections vs covered sections) - 新增表格行覆盖率检查 - 不达标时输出未覆盖章节列表 - passed 条件增加覆盖率阈值判断 Co-Authored-By: Claude Opus 4.7 --- .../step1_semantic_index.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/skills/ir_generation_skill/step1_semantic_index.py b/skills/ir_generation_skill/step1_semantic_index.py index 9562260..86d5657 100644 --- a/skills/ir_generation_skill/step1_semantic_index.py +++ b/skills/ir_generation_skill/step1_semantic_index.py @@ -484,10 +484,53 @@ def _quick_validate( ): gaps["missing_concepts"].append("缺少 scope 概念: 海外") + # --- Section and table coverage --- + # Count functional sections (those with numbered titles that contain text/tables) + func_sections = [ + s for s in doc.get("sections", []) + if s.get("source", "").strip() + and any(b.get("type") in ("para", "table") for b in s.get("blocks", [])) + ] + covered_sections: set[str] = set() + for fu in units: + for src in fu.get("sources", []): + sec = src.get("section", "") + if sec: + covered_sections.add(sec) + + section_cov = len(covered_sections) / max(len(func_sections), 1) + if section_cov < config.COVERAGE_TARGET: + uncovered = [s["source"] for s in func_sections + if s["source"] not in covered_sections] + gaps["missing_paths"].append( + f"章节覆盖率 {section_cov:.0%} < {config.COVERAGE_TARGET:.0%}, " + f"未覆盖: {uncovered[:5]}" + ) + + # Count table rows + total_rows = sum( + len(b.get("rows", [])) + for s in doc.get("sections", []) + for b in s.get("blocks", []) + if b.get("type") == "table" + ) + covered_rows = sum( + 1 for fu in units + for src in fu.get("sources", []) + if src.get("type") == "table" and src.get("row") + ) + row_cov = covered_rows / max(total_rows, 1) + if row_cov < config.COVERAGE_TARGET: + gaps["missing_paths"].append( + f"表格行覆盖率 {row_cov:.0%} < {config.COVERAGE_TARGET:.0%}, " + f"({covered_rows}/{total_rows} rows)" + ) + passed = ( not gaps["missing_paths"] and not gaps["format_issues"] and not gaps["parent_issues"] + and section_cov >= config.COVERAGE_TARGET ) return passed, gaps