Compare commits

...

4 Commits

Author SHA1 Message Date
pzhang_zywl eccaa28b1d test: blocked-check 用 _req_safe 替代 _req 避免 API 错误误判 - Closes #58
CI / test (pull_request) Successful in 12s
- 新增 _req_safe():API 错误返回 None 而非 sys.exit(1)
- blocked_check / _unblock_issues_blocked_by / _get_blocking_refs 改用 _req_safe
- API 失败时保守处理:保持 blocked 状态

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 16:20:12 +08:00
pzhang_zywl d73da7cda9 test: blocked-check 用 _req_safe 替代 _req 避免 API 错误误判 - Closes #58
- 新增 _req_safe():API 错误返回 None 而非 sys.exit(1)
- blocked_check / _unblock_issues_blocked_by / _get_blocking_refs 改用 _req_safe
- API 失败时保守处理:保持 blocked 状态(不误解除)
- 验证:#18 正确识别被 #57 阻塞

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 16:17:39 +08:00
pzhang_zywl 268520d453 fix: step3 过滤非法 source type + step1 重试质量门控 - Closes #57
CI / test (pull_request) Successful in 11s
- step3 _normalize_rule: 将 function_unit_description 等非法 source type 标准化为 text
- step1 覆盖反馈重试: 仅纳入实际提升覆盖率的 retry 结果,避免低质量输出稀释 ensemble
- 新增 UT: test_normalize_source_invalid_type

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-06-02 16:16:47 +08:00
pzhang_zywl 1b8baed542 Merge pull request 'fix: [bug] QE Audit inadequate_ratio 80% 功能覆盖不足 - 来自 #18 e2e - Closes #54' (#56) from dev/issue-54-coverage-feedback-retry-loop into main
CI / test (push) Successful in 7s
2026-06-02 15:50:15 +08:00
4 changed files with 91 additions and 39 deletions
+36 -23
View File
@@ -56,6 +56,27 @@ def _req(method, path, data=None):
sys.exit(1) sys.exit(1)
def _req_safe(method, path, data=None):
"""Like _req but returns None on HTTPError instead of crashing.
Used for probing issue/PR existence where the caller can handle absence.
"""
url = f"{BASE}{path}"
payload = json.dumps(data).encode("utf-8") if data else None
req = urllib.request.Request(url, data=payload, method=method)
req.add_header("Authorization", f"token {GITEA_TOKEN}")
req.add_header("Content-Type", "application/json")
try:
with urllib.request.urlopen(req) as resp:
raw = resp.read()
if not raw:
return {}
return json.loads(raw)
except urllib.error.HTTPError as e:
body = e.read().decode()
print(f"API Error {e.code}: {body}", file=sys.stderr)
return None
# ── Issue operations ───────────────────────────────────────────────────────── # ── Issue operations ─────────────────────────────────────────────────────────
def list_issues(labels: list[str] | None = None): def list_issues(labels: list[str] | None = None):
@@ -82,17 +103,17 @@ def _get_blocking_refs(issue_num: int) -> set[int]:
""" """
refs: set[int] = set() refs: set[int] = set()
# Body # Body
issue = _req("GET", f"/issues/{issue_num}") issue = _req_safe("GET", f"/issues/{issue_num}")
if issue is None:
return refs # API error → return empty set, keep blocked
body = issue.get("body", "") or "" body = issue.get("body", "") or ""
refs.update(int(m.group(1)) for m in re.finditer(r'#(\d+)', body)) refs.update(int(m.group(1)) for m in re.finditer(r'#(\d+)', body))
# Comments # Comments
try: comments = _req_safe("GET", f"/issues/{issue_num}/comments")
comments = _req("GET", f"/issues/{issue_num}/comments") if comments:
for c in comments: for c in comments:
cbody = c.get("body", "") or "" cbody = c.get("body", "") or ""
refs.update(int(m.group(1)) for m in re.finditer(r'#(\d+)', cbody)) refs.update(int(m.group(1)) for m in re.finditer(r'#(\d+)', cbody))
except SystemExit:
pass
return refs return refs
@@ -103,12 +124,7 @@ def blocked_check():
If no references found or all referenced issues are closed, If no references found or all referenced issues are closed,
removes the 'blocked' label. removes the 'blocked' label.
""" """
try: all_blocked = _req_safe("GET", "/issues?state=open&labels=blocked")
all_blocked = _req("GET", "/issues?state=open&labels=blocked")
except SystemExit:
print("No blocked issues found.")
return
if not all_blocked: if not all_blocked:
print("No blocked issues found.") print("No blocked issues found.")
return return
@@ -119,13 +135,13 @@ def blocked_check():
all_resolved = True all_resolved = True
for blk in blocking_nums: for blk in blocking_nums:
try: blk_issue = _req_safe("GET", f"/issues/{blk}")
blk_issue = _req("GET", f"/issues/{blk}") if blk_issue is None:
all_resolved = False # API error → keep blocked
break
if blk_issue.get("state") != "closed": if blk_issue.get("state") != "closed":
all_resolved = False all_resolved = False
break break
except SystemExit:
pass
if all_resolved: if all_resolved:
current_label_names = [l["name"] for l in issue.get("labels", [])] current_label_names = [l["name"] for l in issue.get("labels", [])]
@@ -179,10 +195,7 @@ def _unblock_issues_blocked_by(closed_num):
in any blocked issue and all referenced issues are now closed, in any blocked issue and all referenced issues are now closed,
removes the 'blocked' label and comments on the unblocked issue. removes the 'blocked' label and comments on the unblocked issue.
""" """
try: all_blocked = _req_safe("GET", "/issues?state=open&labels=blocked")
all_blocked = _req("GET", "/issues?state=open&labels=blocked")
except SystemExit:
return
if not all_blocked: if not all_blocked:
return return
@@ -196,13 +209,13 @@ def _unblock_issues_blocked_by(closed_num):
for blk in blocking_nums: for blk in blocking_nums:
if blk == closed_num: if blk == closed_num:
continue continue
try: blk_issue = _req_safe("GET", f"/issues/{blk}")
blk_issue = _req("GET", f"/issues/{blk}") if blk_issue is None:
all_resolved = False # API error → keep blocked
break
if blk_issue.get("state") != "closed": if blk_issue.get("state") != "closed":
all_resolved = False all_resolved = False
break break
except SystemExit:
pass # Inaccessible → treat as resolved
if all_resolved: if all_resolved:
current_label_names = [l["name"] for l in issue.get("labels", [])] current_label_names = [l["name"] for l in issue.get("labels", [])]
@@ -880,7 +880,7 @@ def run_ensemble_semantic_index(doc: dict) -> dict:
if v: if v:
print(f" {k}: {len(v)} 个问题") print(f" {k}: {len(v)} 个问题")
# Feedback retry: re-run with coverage feedback (up to 2 retries) # Feedback retry: re-run with coverage feedback (up to 2 retries, quality-gated)
retry_count = 0 retry_count = 0
while retry_count < 2: while retry_count < 2:
feedback = _build_coverage_feedback(gaps) feedback = _build_coverage_feedback(gaps)
@@ -889,6 +889,10 @@ def run_ensemble_semantic_index(doc: dict) -> dict:
retry_count += 1 retry_count += 1
print(f"\n 覆盖反馈重试 #{retry_count} (feedback长度={len(feedback)}字符)...", flush=True) print(f"\n 覆盖反馈重试 #{retry_count} (feedback长度={len(feedback)}字符)...", flush=True)
try: try:
# record pre-retry coverage to gate quality
pre_warnings = len(gaps.get("coverage_warnings", []))
pre_missing_rows = len(gaps.get("missing_table_rows", []))
retry_prompt = build_prompt(doc, feedback, all_paths) retry_prompt = build_prompt(doc, feedback, all_paths)
print(f" 重试 prompt 长度: {len(retry_prompt)} 字符", flush=True) print(f" 重试 prompt 长度: {len(retry_prompt)} 字符", flush=True)
retry_result = call_llm(retry_prompt, max_retries=1, temperature=0.3) retry_result = call_llm(retry_prompt, max_retries=1, temperature=0.3)
@@ -902,15 +906,28 @@ def run_ensemble_semantic_index(doc: dict) -> dict:
if src.get("section"): if src.get("section"):
retry_sections.add(src["section"]) retry_sections.add(src["section"])
print(f" 重试新增 sections: {sorted(retry_sections)}", flush=True) print(f" 重试新增 sections: {sorted(retry_sections)}", flush=True)
# Quality gate: only include retry if it improves coverage
trial_indices = semantic_indices + [retry_result]
trial_merged = ensemble_merge(trial_indices)
trial_passed, trial_gaps = _quick_validate(trial_merged, doc, all_paths)
trial_warnings = len(trial_gaps.get("coverage_warnings", []))
trial_missing = len(trial_gaps.get("missing_table_rows", []))
if trial_warnings < pre_warnings or trial_missing < pre_missing_rows:
semantic_indices.append(retry_result) semantic_indices.append(retry_result)
merged = ensemble_merge(semantic_indices) merged = trial_merged
passed, gaps = trial_passed, trial_gaps
merged["ensemble_temperatures"] = list(temperatures) + [f"feedback_retry_{retry_count}"] merged["ensemble_temperatures"] = list(temperatures) + [f"feedback_retry_{retry_count}"]
passed, gaps = _quick_validate(merged, doc, all_paths)
merged["validation_passed"] = passed merged["validation_passed"] = passed
merged["validation_gaps"] = { merged["validation_gaps"] = {
k: v for k, v in gaps.items() if v k: v for k, v in gaps.items() if v
} }
print(f" 重试后验证: {'PASS' if passed else 'GAPS FOUND'}", flush=True) print(f" 重试后验证 (已采纳): {'PASS' if passed else 'GAPS FOUND'} "
f"(warnings {pre_warnings}{trial_warnings}, "
f"missing_rows {pre_missing_rows}{trial_missing})", flush=True)
else:
print(f" 重试结果未提升覆盖率,丢弃 "
f"(warnings {pre_warnings}{trial_warnings}, "
f"missing_rows {pre_missing_rows}{trial_missing})", flush=True)
except Exception as e: except Exception as e:
print(f" 覆盖反馈重试失败: {e}", flush=True) print(f" 覆盖反馈重试失败: {e}", flush=True)
import traceback import traceback
@@ -170,8 +170,11 @@ def _normalize_rule(rule: dict) -> dict:
}] }]
# Ensure table/text sources have a section field (defensive against LLM omission) # Ensure table/text sources have a section field (defensive against LLM omission)
# Also normalize invalid source types (LLM hallucinations like function_unit_description)
sources = rule.get("sources", []) sources = rule.get("sources", [])
if sources: if sources:
valid_types = {"table", "text", "logic_tree"}
# try to infer a default section from sibling sources or the rule path # try to infer a default section from sibling sources or the rule path
default_section = "" default_section = ""
for s in sources: for s in sources:
@@ -186,6 +189,10 @@ def _normalize_rule(rule: dict) -> dict:
for src in sources: for src in sources:
stype = src.get("type", "") stype = src.get("type", "")
# Normalize invalid source types to "text"
if stype and stype not in valid_types:
src["type"] = "text"
stype = "text"
if stype in ("table", "text"): if stype in ("table", "text"):
if not src.get("section"): if not src.get("section"):
src["section"] = default_section src["section"] = default_section
@@ -511,3 +511,18 @@ class TestNormalizeRule:
} }
normalized = _normalize_rule(rule) normalized = _normalize_rule(rule)
assert "section" not in normalized["sources"][0] assert "section" not in normalized["sources"][0]
def test_normalize_source_invalid_type(self):
"""Invalid source types (LLM hallucinations) are normalized to text."""
rule = {
"trigger": {"conditions": [{"signal": "x", "operator": "==", "value": "1"}]},
"sources": [
{"type": "function_unit_description", "text_snippet": "desc",
"section": "3.1 功能"},
{"type": "unknown_type", "text_snippet": "also invalid"},
],
}
normalized = _normalize_rule(rule)
assert normalized["sources"][0]["type"] == "text"
assert normalized["sources"][1]["type"] == "text"
assert normalized["sources"][0]["section"] == "3.1 功能"