fix: [test] _extract_content_units 表格行计数包含非功能章节 - Closes #33 #35
@@ -553,28 +553,67 @@ def _quick_validate(
|
|||||||
f"未覆盖: {uncovered[:5]}"
|
f"未覆盖: {uncovered[:5]}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Count table rows
|
# Count table rows — only from functional sections with content
|
||||||
total_rows = sum(
|
total_rows = sum(
|
||||||
len(b.get("rows", []))
|
len(b.get("rows", []))
|
||||||
for s in doc.get("sections", [])
|
for s in doc.get("sections", [])
|
||||||
|
if _is_functional_section(s.get("source", ""))
|
||||||
|
and _has_section_content(s)
|
||||||
for b in s.get("blocks", [])
|
for b in s.get("blocks", [])
|
||||||
if b.get("type") == "table"
|
if b.get("type") == "table"
|
||||||
)
|
)
|
||||||
covered_rows = sum(
|
covered_set: set[tuple] = set()
|
||||||
1 for fu in units
|
for fu in units:
|
||||||
for src in fu.get("sources", [])
|
for src in fu.get("sources", []):
|
||||||
if src.get("type") == "table" and src.get("row")
|
if src.get("type") == "table" and src.get("row"):
|
||||||
)
|
covered_set.add((src.get("section", ""), src.get("row")))
|
||||||
row_cov = covered_rows / max(total_rows, 1)
|
covered_rows = len(covered_set)
|
||||||
# When there are no table rows to cover, skip the check (not a coverage failure)
|
# When there are no table rows to cover, skip check
|
||||||
if total_rows == 0:
|
if total_rows == 0:
|
||||||
row_cov = 1.0
|
row_cov = 1.0
|
||||||
|
else:
|
||||||
|
row_cov = covered_rows / total_rows
|
||||||
print(f" 表格行覆盖率: {row_cov:.0%} ({covered_rows}/{total_rows} rows)", flush=True)
|
print(f" 表格行覆盖率: {row_cov:.0%} ({covered_rows}/{total_rows} rows)", flush=True)
|
||||||
if row_cov < SECTION_COVERAGE_TARGET:
|
if row_cov < SECTION_COVERAGE_TARGET:
|
||||||
|
# Collect specific missing rows with content for targeted feedback
|
||||||
|
missing_rows: list[dict] = []
|
||||||
|
for s in doc.get("sections", []):
|
||||||
|
if not _is_functional_section(s.get("source", "")):
|
||||||
|
continue
|
||||||
|
if not _has_section_content(s):
|
||||||
|
continue
|
||||||
|
sec_name = s.get("source", "").split()[0] if s.get("source") else "?"
|
||||||
|
for b in s.get("blocks", []):
|
||||||
|
if b.get("type") != "table":
|
||||||
|
continue
|
||||||
|
for row in b.get("rows", []):
|
||||||
|
rn = row.get("row")
|
||||||
|
if (sec_name, rn) not in covered_set:
|
||||||
|
key_col = ""
|
||||||
|
val_col = ""
|
||||||
|
for col in row.get("columns", []):
|
||||||
|
cn = col.get("name", "")
|
||||||
|
ct = col.get("text", "")[:100]
|
||||||
|
if cn in ("功能", "三级功能", "一级功能", "功能名称"):
|
||||||
|
key_col = ct
|
||||||
|
elif cn in ("功能详细说明", "详细说明", "四级功能", "说明"):
|
||||||
|
val_col = ct
|
||||||
|
if not key_col:
|
||||||
|
# Use first column as key
|
||||||
|
for col in row.get("columns", []):
|
||||||
|
key_col = col.get("text", "")[:60]
|
||||||
|
break
|
||||||
|
missing_rows.append({
|
||||||
|
"section": sec_name,
|
||||||
|
"row": rn,
|
||||||
|
"key": key_col,
|
||||||
|
"value": val_col,
|
||||||
|
})
|
||||||
gaps["coverage_warnings"].append(
|
gaps["coverage_warnings"].append(
|
||||||
f"表格行覆盖率 {row_cov:.0%} < {SECTION_COVERAGE_TARGET:.0%}, "
|
f"表格行覆盖率 {row_cov:.0%} < {SECTION_COVERAGE_TARGET:.0%}, "
|
||||||
f"({covered_rows}/{total_rows} rows)"
|
f"({covered_rows}/{total_rows} rows from functional sections)"
|
||||||
)
|
)
|
||||||
|
gaps["missing_table_rows"] = missing_rows
|
||||||
|
|
||||||
# Coverage warnings are non-blocking (depend on LLM prompt quality)
|
# Coverage warnings are non-blocking (depend on LLM prompt quality)
|
||||||
if gaps["coverage_warnings"]:
|
if gaps["coverage_warnings"]:
|
||||||
@@ -595,19 +634,34 @@ def _build_coverage_feedback(gaps: dict) -> str:
|
|||||||
parts = []
|
parts = []
|
||||||
for item in gaps.get("coverage_warnings", []):
|
for item in gaps.get("coverage_warnings", []):
|
||||||
parts.append(f"- {item}")
|
parts.append(f"- {item}")
|
||||||
|
|
||||||
|
# Include specific missing table rows with their content
|
||||||
|
missing_rows = gaps.get("missing_table_rows", [])
|
||||||
|
if missing_rows:
|
||||||
|
parts.append(f"\n### 以下具体表格行缺少对应 function_unit(共 {len(missing_rows)} 行):\n")
|
||||||
|
for mr in missing_rows:
|
||||||
|
sec = mr.get("section", "?")
|
||||||
|
rn = mr.get("row", "?")
|
||||||
|
key = mr.get("key", "")
|
||||||
|
val = mr.get("value", "")
|
||||||
|
parts.append(
|
||||||
|
f"- **章节 {sec}, 行 {rn}**: {key}"
|
||||||
|
+ (f" — {val}" if val else "")
|
||||||
|
)
|
||||||
|
|
||||||
if not parts:
|
if not parts:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
return (
|
return (
|
||||||
"\n## 关键覆盖反馈(上一轮 LLM 输出了以下缺口,请重新处理)\n\n"
|
"\n## 关键覆盖反馈(上一轮 LLM 输出存在缺口,请重新处理)\n\n"
|
||||||
+ "\n".join(parts)
|
+ "\n".join(parts)
|
||||||
+ "\n\n"
|
+ "\n\n"
|
||||||
"### 修复动作(必须执行)\n\n"
|
"### 修复动作(必须执行)\n\n"
|
||||||
"1. **重新扫描上述每个缺失章节**,从文字和表格中提取所有可被测试的功能行为\n"
|
"1. **重新扫描上述每个缺失章节和表格行**,从文字和表格中提取所有可被测试的功能行为\n"
|
||||||
"2. **为每个缺失的表格行创建独立的 function_unit**,不得合并不同行的规则\n"
|
"2. **为上述每个缺失表格行创建独立的 function_unit**,不得合并不同行的规则\n"
|
||||||
"3. **每个 function_unit 必须引用具体的 section 号和 row 号**作为 source\n"
|
"3. **每个 function_unit 必须引用具体的 section 号和 row 号**作为 source\n"
|
||||||
"4. **非功能章节可以跳过**(如背景、术语、变更日志),但行为规则章节必须覆盖\n"
|
"4. **非功能章节可以跳过**(如背景、术语、变更日志),但行为规则章节必须覆盖\n"
|
||||||
"5. 输出中必须包含针对上述缺口的新 function_unit\n"
|
"5. 输出中必须包含针对上述缺口的新 function_unit,**尤其是列出具体缺失的表格行**\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -137,12 +137,18 @@ def _extract_content_units(parsed_data: dict) -> dict:
|
|||||||
|
|
||||||
for sec in sections:
|
for sec in sections:
|
||||||
name = sec.get("source", "")
|
name = sec.get("source", "")
|
||||||
if _is_functional_section(name) and _has_section_content(sec):
|
is_func = _is_functional_section(name) and _has_section_content(sec)
|
||||||
|
if is_func:
|
||||||
functional_sections.append({
|
functional_sections.append({
|
||||||
"name": name,
|
"name": name,
|
||||||
"number": _section_number(name),
|
"number": _section_number(name),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Only count table rows from functional sections
|
||||||
|
# (non-functional sections like changelog, glossary, references
|
||||||
|
# cannot be covered by function_units — counting them inflates
|
||||||
|
# the denominator and yields misleadingly low coverage.)
|
||||||
|
if is_func:
|
||||||
for block in sec.get("blocks", []):
|
for block in sec.get("blocks", []):
|
||||||
if block.get("type") == "table":
|
if block.get("type") == "table":
|
||||||
rows = block.get("rows", [])
|
rows = block.get("rows", [])
|
||||||
|
|||||||
Reference in New Issue
Block a user