From 93e13e947cdac8a21890f4884afabf0295890e99 Mon Sep 17 00:00:00 2001
From: Peter Zhang <18501667167@qq.com>
Date: Mon, 1 Jun 2026 14:03:59 +0800
Subject: [PATCH 1/2] fix: table coverage only counts functional sections +
 specific missing row feedback - Closes #21

- _quick_validate: table rows only from functional sections
- Track specific missing rows with content for targeted feedback
- _build_coverage_feedback: includes missing row details
- Denominator: 24->18 rows, coverage: 54%->67%

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../step1_semantic_index.py                   | 80 ++++++++++++++++---
 1 file changed, 67 insertions(+), 13 deletions(-)

diff --git a/skills/ir_generation_skill/step1_semantic_index.py b/skills/ir_generation_skill/step1_semantic_index.py
index 68879f7..4e34b1a 100644
--- a/skills/ir_generation_skill/step1_semantic_index.py
+++ b/skills/ir_generation_skill/step1_semantic_index.py
@@ -553,28 +553,67 @@ def _quick_validate(
             f"未覆盖: {uncovered[:5]}"
         )
 
-    # Count table rows
+    # Count table rows — only from functional sections with content
     total_rows = sum(
         len(b.get("rows", []))
         for s in doc.get("sections", [])
+        if _is_functional_section(s.get("source", ""))
+        and _has_section_content(s)
         for b in s.get("blocks", [])
         if b.get("type") == "table"
     )
-    covered_rows = sum(
-        1 for fu in units
-        for src in fu.get("sources", [])
-        if src.get("type") == "table" and src.get("row")
-    )
-    row_cov = covered_rows / max(total_rows, 1)
-    # When there are no table rows to cover, skip the check (not a coverage failure)
+    covered_set: set[tuple] = set()
+    for fu in units:
+        for src in fu.get("sources", []):
+            if src.get("type") == "table" and src.get("row"):
+                covered_set.add((src.get("section", ""), src.get("row")))
+    covered_rows = len(covered_set)
+    # When there are no table rows to cover, skip check
     if total_rows == 0:
         row_cov = 1.0
+    else:
+        row_cov = covered_rows / total_rows
     print(f"  表格行覆盖率: {row_cov:.0%} ({covered_rows}/{total_rows} rows)", flush=True)
     if row_cov < SECTION_COVERAGE_TARGET:
+        # Collect specific missing rows with content for targeted feedback
+        missing_rows: list[dict] = []
+        for s in doc.get("sections", []):
+            if not _is_functional_section(s.get("source", "")):
+                continue
+            if not _has_section_content(s):
+                continue
+            sec_name = s.get("source", "").split()[0] if s.get("source") else "?"
+            for b in s.get("blocks", []):
+                if b.get("type") != "table":
+                    continue
+                for row in b.get("rows", []):
+                    rn = row.get("row")
+                    if (sec_name, rn) not in covered_set:
+                        key_col = ""
+                        val_col = ""
+                        for col in row.get("columns", []):
+                            cn = col.get("name", "")
+                            ct = col.get("text", "")[:100]
+                            if cn in ("功能", "三级功能", "一级功能", "功能名称"):
+                                key_col = ct
+                            elif cn in ("功能详细说明", "详细说明", "四级功能", "说明"):
+                                val_col = ct
+                        if not key_col:
+                            # Use first column as key
+                            for col in row.get("columns", []):
+                                key_col = col.get("text", "")[:60]
+                                break
+                        missing_rows.append({
+                            "section": sec_name,
+                            "row": rn,
+                            "key": key_col,
+                            "value": val_col,
+                        })
         gaps["coverage_warnings"].append(
             f"表格行覆盖率 {row_cov:.0%} < {SECTION_COVERAGE_TARGET:.0%}, "
-            f"({covered_rows}/{total_rows} rows)"
+            f"({covered_rows}/{total_rows} rows from functional sections)"
         )
+        gaps["missing_table_rows"] = missing_rows
 
     # Coverage warnings are non-blocking (depend on LLM prompt quality)
     if gaps["coverage_warnings"]:
@@ -595,19 +634,34 @@ def _build_coverage_feedback(gaps: dict) -> str:
     parts = []
     for item in gaps.get("coverage_warnings", []):
         parts.append(f"- {item}")
+
+    # Include specific missing table rows with their content
+    missing_rows = gaps.get("missing_table_rows", [])
+    if missing_rows:
+        parts.append(f"\n### 以下具体表格行缺少对应 function_unit（共 {len(missing_rows)} 行）：\n")
+        for mr in missing_rows:
+            sec = mr.get("section", "?")
+            rn = mr.get("row", "?")
+            key = mr.get("key", "")
+            val = mr.get("value", "")
+            parts.append(
+                f"- **章节 {sec}, 行 {rn}**: {key}"
+                + (f" — {val}" if val else "")
+            )
+
     if not parts:
         return ""
 
     return (
-        "\n## 关键覆盖反馈（上一轮 LLM 输出了以下缺口，请重新处理）\n\n"
+        "\n## 关键覆盖反馈（上一轮 LLM 输出存在缺口，请重新处理）\n\n"
         + "\n".join(parts)
         + "\n\n"
         "### 修复动作（必须执行）\n\n"
-        "1. **重新扫描上述每个缺失章节**，从文字和表格中提取所有可被测试的功能行为\n"
-        "2. **为每个缺失的表格行创建独立的 function_unit**，不得合并不同行的规则\n"
+        "1. **重新扫描上述每个缺失章节和表格行**，从文字和表格中提取所有可被测试的功能行为\n"
+        "2. **为上述每个缺失表格行创建独立的 function_unit**，不得合并不同行的规则\n"
         "3. **每个 function_unit 必须引用具体的 section 号和 row 号**作为 source\n"
         "4. **非功能章节可以跳过**（如背景、术语、变更日志），但行为规则章节必须覆盖\n"
-        "5. 输出中必须包含针对上述缺口的新 function_unit\n"
+        "5. 输出中必须包含针对上述缺口的新 function_unit，**尤其是列出具体缺失的表格行**\n"
     )
 
 

From 119c08faca5a478545f76cf916b53526bb20b6ff Mon Sep 17 00:00:00 2001
From: Peter Zhang <18501667167@qq.com>
Date: Mon, 1 Jun 2026 14:06:16 +0800
Subject: [PATCH 2/2] =?UTF-8?q?test:=20=5Fextract=5Fcontent=5Funits=20?=
 =?UTF-8?q?=E4=BB=85=E7=BB=9F=E8=AE=A1=E5=8A=9F=E8=83=BD=E7=AB=A0=E8=8A=82?=
 =?UTF-8?q?=E8=A1=A8=E6=A0=BC=E8=A1=8C=20-=20Closes=20#33?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

非功能章节（变更日志、术语解释等）的表格行不可能被
function_units 覆盖，计入分母会导致覆盖率虚低。

修复: table_rows 统计仅在 _is_functional_section
且 _has_section_content 的章节中进行。

Table 覆盖率: 54.2% → 72.2% (24行→18行分母)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 tests/acceptance/test_main_health.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tests/acceptance/test_main_health.py b/tests/acceptance/test_main_health.py
index 6a027af..3a86d89 100644
--- a/tests/acceptance/test_main_health.py
+++ b/tests/acceptance/test_main_health.py
@@ -137,16 +137,22 @@ def _extract_content_units(parsed_data: dict) -> dict:
 
     for sec in sections:
         name = sec.get("source", "")
-        if _is_functional_section(name) and _has_section_content(sec):
+        is_func = _is_functional_section(name) and _has_section_content(sec)
+        if is_func:
             functional_sections.append({
                 "name": name,
                 "number": _section_number(name),
             })
 
-        for block in sec.get("blocks", []):
-            if block.get("type") == "table":
-                rows = block.get("rows", [])
-                total_table_rows += len(rows)
+        # Only count table rows from functional sections
+        # (non-functional sections like changelog, glossary, references
+        #  cannot be covered by function_units — counting them inflates
+        #  the denominator and yields misleadingly low coverage.)
+        if is_func:
+            for block in sec.get("blocks", []):
+                if block.get("type") == "table":
+                    rows = block.get("rows", [])
+                    total_table_rows += len(rows)
 
     # Diagram-type images from image_analysis
     diagram_rids: list[str] = []