Compare commits

...

4 Commits

Author SHA1 Message Date
pzhang_zywl ebda8e37d1 fix: step1 空章节过滤 + step3 rule_signature None-safe - Closes #21
CI / test (pull_request) Successful in 9s
- step1 _quick_validate 添加 _has_section_content() 过滤空内容章节
  (如仅含"无"字的图片章节),避免误报低覆盖率警告
- step3 rule_signature 使用 `or {}` 防御 trigger=None 场景
  修复 QE 报告的 step3 AttributeError

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 13:15:19 +08:00
pzhang_zywl d1e36b20ee Merge pull request 'fix: [test-dev] _extract_content_units 空章节误计为功能章节 - Closes #29' (#30) from test/issue-29 into main
CI / test (push) Successful in 14s
2026-06-01 11:24:04 +08:00
pzhang_zywl 01c93e52d3 test: _has_section_content() 过滤空章节,修复章节覆盖率误报 - Closes #29
CI / test (pull_request) Successful in 9s
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-01 10:16:23 +08:00
pzhang_zywl 7bcd414692 Merge pull request 'fix: 修复章节覆盖率误报 + pipeline 验证非阻塞 - Closes #21' (#27) from dev/issue-22-fix-trigger-null into main
CI / test (push) Successful in 7s
CI / test (pull_request) Successful in 8s
2026-05-31 22:46:30 +08:00
3 changed files with 40 additions and 4 deletions
@@ -509,10 +509,28 @@ def _quick_validate(
return True return True
return True return True
def _has_section_content(sec: dict) -> bool:
"""Check if a section has meaningful content (text >= 10 chars, table, or image).
A section is considered "empty" if all its text blocks have fewer than
10 characters and it contains no tables or images. These typically come
from image-only Word sections that doc_parser cannot extract text from.
"""
for block in sec.get("blocks", []):
blk_type = block.get("type", "")
if blk_type == "table":
return True
if blk_type in ("image", "figure", "picture"):
return True
text = block.get("text", "")
if isinstance(text, str) and len(text.strip()) >= 10:
return True
return False
func_sections = [ func_sections = [
s for s in doc.get("sections", []) s for s in doc.get("sections", [])
if _is_functional_section(s.get("source", "")) if _is_functional_section(s.get("source", ""))
and any(b.get("type") in ("para", "table") for b in s.get("blocks", [])) and _has_section_content(s)
] ]
covered_sections: set[str] = set() covered_sections: set[str] = set()
for fu in units: for fu in units:
@@ -111,8 +111,8 @@ def load_path_enumeration() -> dict:
def rule_signature(rule: dict) -> str: def rule_signature(rule: dict) -> str:
"""Generate a dedup signature from path + trigger + actions.""" """Generate a dedup signature from path + trigger + actions."""
path = rule.get("path", []) path = rule.get("path", [])
trigger = rule.get("trigger", {}) trigger = rule.get("trigger") or {}
actions = rule.get("actions", []) actions = rule.get("actions") or []
conditions = sorted( conditions = sorted(
trigger.get("conditions", []), key=lambda c: c.get("signal", "") trigger.get("conditions", []), key=lambda c: c.get("signal", "")
+19 -1
View File
@@ -105,6 +105,24 @@ def _is_functional_section(section_name: str) -> bool:
return True return True
def _has_section_content(sec: dict) -> bool:
"""Check if a section has meaningful content (text, table, or image).
A section is considered "empty" (no real content) if all its text blocks
have fewer than 10 characters and it contains no tables or images.
"""
for block in sec.get("blocks", []):
blk_type = block.get("type", "")
if blk_type == "table":
return True
if blk_type in ("image", "figure", "picture"):
return True
text = block.get("text", "")
if isinstance(text, str) and len(text.strip()) >= 10:
return True
return False
def _extract_content_units(parsed_data: dict) -> dict: def _extract_content_units(parsed_data: dict) -> dict:
"""Extract countable content units from parsed JSON. """Extract countable content units from parsed JSON.
@@ -119,7 +137,7 @@ def _extract_content_units(parsed_data: dict) -> dict:
for sec in sections: for sec in sections:
name = sec.get("source", "") name = sec.get("source", "")
if _is_functional_section(name): if _is_functional_section(name) and _has_section_content(sec):
functional_sections.append({ functional_sections.append({
"name": name, "name": name,
"number": _section_number(name), "number": _section_number(name),