Files
document_analyzer/tests/acceptance/report.py
T
pzhang_zywl fec4c09ee0
CI / test (push) Successful in 8s
sync: update all skills from latest workspace code
doc_parser_skill:
- New: verify_flowchart.py (flowchart validation)
- Updated: LLM.py (multi-provider: DeepSeek + DashScope)
- Updated: image_parser.py (logic tree support, external prompts)
- Updated: SKILL.md, prompts/image_prompt.md

conflict_detection_skill:
- Updated: LLM.py (multi-provider sync)
- Updated: detect_conflicts.py (logic tree text conversion)

ir_generation_skill:
- Replaced old scripts/LLM.py + ir_generator.py with standalone project
- New: main.py, config.py, step1-3_*.py, ensemble_merge.py
- New: prompts/, tests/ subdirectories

tests:
- New: acceptance/ test suite with schema validation
- Fixed: conftest no longer globally skips non-acceptance tests
- Updated: test_sample.py for new ir_generation structure

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00

179 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Structured JSON report generation for QE acceptance test results.
Produces a unified report with three-layer verdict:
Layer A Schema compliance
Layer B Structural coverage + stability
Layer C LLM QE expert audit
Final verdict: PASS (releasable) or FAIL (blocked).
"""
from __future__ import annotations
import json
import time
from pathlib import Path
from typing import Any
def generate_report(
schema_result: dict,
coverage_result: dict,
audit_result: dict | None,
*,
commit: str = "",
branch: str = "main",
output_path: str | None = None,
) -> dict:
"""Assemble the three-layer report and return it.
Args:
schema_result: ``{"verdict": "PASS"|"FAIL", "total_checks": N, "passed": N, "failed": N}``
coverage_result: ``{"verdict": "PASS"|"FAIL", "coverage_rate": float,
"stability": {"runs": N, "values": [...], "std": float}}``
audit_result: ``{"verdict": "ACCEPT"|"REJECT", "inadequate_ratio": float,
"rationale": str, "section_assessments": [...]}`` or None
commit: git commit SHA
branch: branch name
output_path: if set, write the report JSON to this path
Returns the report dict.
"""
layers: dict[str, Any] = {
"A_schema": schema_result,
"B_coverage": coverage_result,
}
if audit_result is not None:
layers["C_qe_audit"] = audit_result
# ── final verdict ──
a_pass = schema_result.get("verdict") == "PASS"
b_pass = coverage_result.get("verdict") == "PASS"
c_pass = (
audit_result is None
or audit_result.get("verdict") == "ACCEPT"
)
all_pass = a_pass and b_pass and c_pass
report = {
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
"commit": commit,
"branch": branch,
"layers": layers,
"final_verdict": "PASS" if all_pass else "FAIL",
"releasable": all_pass,
"failure_details": _failure_details(layers),
}
if output_path:
out = Path(output_path)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
return report
def _failure_details(layers: dict) -> list[str]:
"""Summarise which layers failed and why."""
details: list[str] = []
schema = layers.get("A_schema", {})
if schema.get("verdict") != "PASS":
details.append(
f"Layer A (Schema): {schema.get('failed', '?')}/{schema.get('total_checks', '?')} checks failed"
)
coverage = layers.get("B_coverage", {})
if coverage.get("verdict") != "PASS":
cv = coverage.get("coverage_rate", "?")
details.append(f"Layer B (Coverage): rate={cv} (threshold: 0.70)")
audit = layers.get("C_qe_audit", {})
if audit.get("verdict") == "REJECT":
details.append(
f"Layer C (QE Audit): REJECT — inadequate_ratio={audit.get('inadequate_ratio', '?')}"
)
return details
# ── Layer-specific result builders ──────────────────────────────────────────
def schema_verdict(errors: list[str], stats: dict) -> dict:
"""Build Layer A result from schema validation errors & stats."""
total = stats.get("total_rules", 0)
valid = stats.get("valid_rules", 0)
failed_checks = len(errors) + (total - valid)
return {
"verdict": "PASS" if failed_checks == 0 else "FAIL",
"total_checks": max(total, 1), # at minimum, we checked the root
"passed": valid if failed_checks == 0 else valid,
"failed": failed_checks,
"rule_pass_rate": round(valid / max(total, 1), 2) if total > 0 else 0,
"sample_errors": errors[:10], # first 10 for the report
}
def coverage_verdict(
coverage_rate: float,
stability_std: float,
stability_values: list[float],
*,
coverage_threshold: float = 0.70,
stability_threshold: float = 0.05,
section_coverage: dict | None = None,
table_coverage: dict | None = None,
diagram_coverage: dict | None = None,
) -> dict:
"""Build Layer B result from coverage metrics."""
b1_pass = coverage_rate >= coverage_threshold
b2_pass = stability_std <= stability_threshold
both_pass = b1_pass and b2_pass
result: dict[str, Any] = {
"verdict": "PASS" if both_pass else "FAIL",
"coverage_rate": round(coverage_rate, 3),
"coverage_threshold": coverage_threshold,
"coverage_pass": b1_pass,
"stability": {
"runs": len(stability_values),
"values": [round(v, 3) for v in stability_values],
"std": round(stability_std, 4),
"threshold": stability_threshold,
"pass": b2_pass,
},
}
if section_coverage:
result["section_coverage"] = section_coverage
if table_coverage:
result["table_coverage"] = table_coverage
if diagram_coverage:
result["diagram_coverage"] = diagram_coverage
return result
def audit_verdict(audit_data: dict, *, inadequate_threshold: float = 0.30) -> dict:
"""Build Layer C result from LLM QE audit.
*audit_data* should contain:
inadequate_ratio: float
rationale: str
section_assessments: list[dict]
"""
ratio = audit_data.get("inadequate_ratio", 1.0)
passed = ratio <= inadequate_threshold
return {
"verdict": "ACCEPT" if passed else "REJECT",
"inadequate_ratio": round(ratio, 3),
"threshold": inadequate_threshold,
"rationale": audit_data.get("rationale", ""),
"total_sections": audit_data.get("total_functional_sections", 0),
"adequate": audit_data.get("adequate", 0),
"inadequate": audit_data.get("inadequate", 0),
"not_applicable": audit_data.get("not_applicable", 0),
}