sync: update all skills from latest workspace code
CI / test (push) Successful in 8s

doc_parser_skill:
- New: verify_flowchart.py (flowchart validation)
- Updated: LLM.py (multi-provider: DeepSeek + DashScope)
- Updated: image_parser.py (logic tree support, external prompts)
- Updated: SKILL.md, prompts/image_prompt.md

conflict_detection_skill:
- Updated: LLM.py (multi-provider sync)
- Updated: detect_conflicts.py (logic tree text conversion)

ir_generation_skill:
- Replaced old scripts/LLM.py + ir_generator.py with standalone project
- New: main.py, config.py, step1-3_*.py, ensemble_merge.py
- New: prompts/, tests/ subdirectories

tests:
- New: acceptance/ test suite with schema validation
- Fixed: conftest no longer globally skips non-acceptance tests
- Updated: test_sample.py for new ir_generation structure

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
+178
View File
@@ -0,0 +1,178 @@
"""Structured JSON report generation for QE acceptance test results.
Produces a unified report with three-layer verdict:
Layer A Schema compliance
Layer B Structural coverage + stability
Layer C LLM QE expert audit
Final verdict: PASS (releasable) or FAIL (blocked).
"""
from __future__ import annotations
import json
import time
from pathlib import Path
from typing import Any
def generate_report(
schema_result: dict,
coverage_result: dict,
audit_result: dict | None,
*,
commit: str = "",
branch: str = "main",
output_path: str | None = None,
) -> dict:
"""Assemble the three-layer report and return it.
Args:
schema_result: ``{"verdict": "PASS"|"FAIL", "total_checks": N, "passed": N, "failed": N}``
coverage_result: ``{"verdict": "PASS"|"FAIL", "coverage_rate": float,
"stability": {"runs": N, "values": [...], "std": float}}``
audit_result: ``{"verdict": "ACCEPT"|"REJECT", "inadequate_ratio": float,
"rationale": str, "section_assessments": [...]}`` or None
commit: git commit SHA
branch: branch name
output_path: if set, write the report JSON to this path
Returns the report dict.
"""
layers: dict[str, Any] = {
"A_schema": schema_result,
"B_coverage": coverage_result,
}
if audit_result is not None:
layers["C_qe_audit"] = audit_result
# ── final verdict ──
a_pass = schema_result.get("verdict") == "PASS"
b_pass = coverage_result.get("verdict") == "PASS"
c_pass = (
audit_result is None
or audit_result.get("verdict") == "ACCEPT"
)
all_pass = a_pass and b_pass and c_pass
report = {
"timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
"commit": commit,
"branch": branch,
"layers": layers,
"final_verdict": "PASS" if all_pass else "FAIL",
"releasable": all_pass,
"failure_details": _failure_details(layers),
}
if output_path:
out = Path(output_path)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
return report
def _failure_details(layers: dict) -> list[str]:
"""Summarise which layers failed and why."""
details: list[str] = []
schema = layers.get("A_schema", {})
if schema.get("verdict") != "PASS":
details.append(
f"Layer A (Schema): {schema.get('failed', '?')}/{schema.get('total_checks', '?')} checks failed"
)
coverage = layers.get("B_coverage", {})
if coverage.get("verdict") != "PASS":
cv = coverage.get("coverage_rate", "?")
details.append(f"Layer B (Coverage): rate={cv} (threshold: 0.70)")
audit = layers.get("C_qe_audit", {})
if audit.get("verdict") == "REJECT":
details.append(
f"Layer C (QE Audit): REJECT — inadequate_ratio={audit.get('inadequate_ratio', '?')}"
)
return details
# ── Layer-specific result builders ──────────────────────────────────────────
def schema_verdict(errors: list[str], stats: dict) -> dict:
"""Build Layer A result from schema validation errors & stats."""
total = stats.get("total_rules", 0)
valid = stats.get("valid_rules", 0)
failed_checks = len(errors) + (total - valid)
return {
"verdict": "PASS" if failed_checks == 0 else "FAIL",
"total_checks": max(total, 1), # at minimum, we checked the root
"passed": valid if failed_checks == 0 else valid,
"failed": failed_checks,
"rule_pass_rate": round(valid / max(total, 1), 2) if total > 0 else 0,
"sample_errors": errors[:10], # first 10 for the report
}
def coverage_verdict(
coverage_rate: float,
stability_std: float,
stability_values: list[float],
*,
coverage_threshold: float = 0.70,
stability_threshold: float = 0.05,
section_coverage: dict | None = None,
table_coverage: dict | None = None,
diagram_coverage: dict | None = None,
) -> dict:
"""Build Layer B result from coverage metrics."""
b1_pass = coverage_rate >= coverage_threshold
b2_pass = stability_std <= stability_threshold
both_pass = b1_pass and b2_pass
result: dict[str, Any] = {
"verdict": "PASS" if both_pass else "FAIL",
"coverage_rate": round(coverage_rate, 3),
"coverage_threshold": coverage_threshold,
"coverage_pass": b1_pass,
"stability": {
"runs": len(stability_values),
"values": [round(v, 3) for v in stability_values],
"std": round(stability_std, 4),
"threshold": stability_threshold,
"pass": b2_pass,
},
}
if section_coverage:
result["section_coverage"] = section_coverage
if table_coverage:
result["table_coverage"] = table_coverage
if diagram_coverage:
result["diagram_coverage"] = diagram_coverage
return result
def audit_verdict(audit_data: dict, *, inadequate_threshold: float = 0.30) -> dict:
"""Build Layer C result from LLM QE audit.
*audit_data* should contain:
inadequate_ratio: float
rationale: str
section_assessments: list[dict]
"""
ratio = audit_data.get("inadequate_ratio", 1.0)
passed = ratio <= inadequate_threshold
return {
"verdict": "ACCEPT" if passed else "REJECT",
"inadequate_ratio": round(ratio, 3),
"threshold": inadequate_threshold,
"rationale": audit_data.get("rationale", ""),
"total_sections": audit_data.get("total_functional_sections", 0),
"adequate": audit_data.get("adequate", 0),
"inadequate": audit_data.get("inadequate", 0),
"not_applicable": audit_data.get("not_applicable", 0),
}