Compare commits

...

1 Commits

Author SHA1 Message Date
pzhang_zywl 93e13e947c fix: table coverage only counts functional sections + specific missing row feedback - Closes #21
CI / test (pull_request) Successful in 8s
- _quick_validate: table rows only from functional sections
- Track specific missing rows with content for targeted feedback
- _build_coverage_feedback: includes missing row details
- Denominator: 24->18 rows, coverage: 54%->67%

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 14:03:59 +08:00
@@ -553,28 +553,67 @@ def _quick_validate(
f"未覆盖: {uncovered[:5]}" f"未覆盖: {uncovered[:5]}"
) )
# Count table rows # Count table rows — only from functional sections with content
total_rows = sum( total_rows = sum(
len(b.get("rows", [])) len(b.get("rows", []))
for s in doc.get("sections", []) for s in doc.get("sections", [])
if _is_functional_section(s.get("source", ""))
and _has_section_content(s)
for b in s.get("blocks", []) for b in s.get("blocks", [])
if b.get("type") == "table" if b.get("type") == "table"
) )
covered_rows = sum( covered_set: set[tuple] = set()
1 for fu in units for fu in units:
for src in fu.get("sources", []) for src in fu.get("sources", []):
if src.get("type") == "table" and src.get("row") if src.get("type") == "table" and src.get("row"):
) covered_set.add((src.get("section", ""), src.get("row")))
row_cov = covered_rows / max(total_rows, 1) covered_rows = len(covered_set)
# When there are no table rows to cover, skip the check (not a coverage failure) # When there are no table rows to cover, skip check
if total_rows == 0: if total_rows == 0:
row_cov = 1.0 row_cov = 1.0
else:
row_cov = covered_rows / total_rows
print(f" 表格行覆盖率: {row_cov:.0%} ({covered_rows}/{total_rows} rows)", flush=True) print(f" 表格行覆盖率: {row_cov:.0%} ({covered_rows}/{total_rows} rows)", flush=True)
if row_cov < SECTION_COVERAGE_TARGET: if row_cov < SECTION_COVERAGE_TARGET:
# Collect specific missing rows with content for targeted feedback
missing_rows: list[dict] = []
for s in doc.get("sections", []):
if not _is_functional_section(s.get("source", "")):
continue
if not _has_section_content(s):
continue
sec_name = s.get("source", "").split()[0] if s.get("source") else "?"
for b in s.get("blocks", []):
if b.get("type") != "table":
continue
for row in b.get("rows", []):
rn = row.get("row")
if (sec_name, rn) not in covered_set:
key_col = ""
val_col = ""
for col in row.get("columns", []):
cn = col.get("name", "")
ct = col.get("text", "")[:100]
if cn in ("功能", "三级功能", "一级功能", "功能名称"):
key_col = ct
elif cn in ("功能详细说明", "详细说明", "四级功能", "说明"):
val_col = ct
if not key_col:
# Use first column as key
for col in row.get("columns", []):
key_col = col.get("text", "")[:60]
break
missing_rows.append({
"section": sec_name,
"row": rn,
"key": key_col,
"value": val_col,
})
gaps["coverage_warnings"].append( gaps["coverage_warnings"].append(
f"表格行覆盖率 {row_cov:.0%} < {SECTION_COVERAGE_TARGET:.0%}, " f"表格行覆盖率 {row_cov:.0%} < {SECTION_COVERAGE_TARGET:.0%}, "
f"({covered_rows}/{total_rows} rows)" f"({covered_rows}/{total_rows} rows from functional sections)"
) )
gaps["missing_table_rows"] = missing_rows
# Coverage warnings are non-blocking (depend on LLM prompt quality) # Coverage warnings are non-blocking (depend on LLM prompt quality)
if gaps["coverage_warnings"]: if gaps["coverage_warnings"]:
@@ -595,19 +634,34 @@ def _build_coverage_feedback(gaps: dict) -> str:
parts = [] parts = []
for item in gaps.get("coverage_warnings", []): for item in gaps.get("coverage_warnings", []):
parts.append(f"- {item}") parts.append(f"- {item}")
# Include specific missing table rows with their content
missing_rows = gaps.get("missing_table_rows", [])
if missing_rows:
parts.append(f"\n### 以下具体表格行缺少对应 function_unit(共 {len(missing_rows)} 行):\n")
for mr in missing_rows:
sec = mr.get("section", "?")
rn = mr.get("row", "?")
key = mr.get("key", "")
val = mr.get("value", "")
parts.append(
f"- **章节 {sec}, 行 {rn}**: {key}"
+ (f"{val}" if val else "")
)
if not parts: if not parts:
return "" return ""
return ( return (
"\n## 关键覆盖反馈(上一轮 LLM 输出了以下缺口,请重新处理)\n\n" "\n## 关键覆盖反馈(上一轮 LLM 输出存在缺口,请重新处理)\n\n"
+ "\n".join(parts) + "\n".join(parts)
+ "\n\n" + "\n\n"
"### 修复动作(必须执行)\n\n" "### 修复动作(必须执行)\n\n"
"1. **重新扫描上述每个缺失章节**,从文字和表格中提取所有可被测试的功能行为\n" "1. **重新扫描上述每个缺失章节和表格行**,从文字和表格中提取所有可被测试的功能行为\n"
"2. **为每个缺失表格行创建独立的 function_unit**,不得合并不同行的规则\n" "2. **为上述每个缺失表格行创建独立的 function_unit**,不得合并不同行的规则\n"
"3. **每个 function_unit 必须引用具体的 section 号和 row 号**作为 source\n" "3. **每个 function_unit 必须引用具体的 section 号和 row 号**作为 source\n"
"4. **非功能章节可以跳过**(如背景、术语、变更日志),但行为规则章节必须覆盖\n" "4. **非功能章节可以跳过**(如背景、术语、变更日志),但行为规则章节必须覆盖\n"
"5. 输出中必须包含针对上述缺口的新 function_unit\n" "5. 输出中必须包含针对上述缺口的新 function_unit**尤其是列出具体缺失的表格行**\n"
) )