sync: update all skills from latest workspace code

doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
@@ -0,0 +1,178 @@
+"""Structured JSON report generation for QE acceptance test results.
+
+Produces a unified report with three-layer verdict:
+  Layer A – Schema compliance
+  Layer B – Structural coverage + stability
+  Layer C – LLM QE expert audit
+
+Final verdict: PASS (releasable) or FAIL (blocked).
+"""
+
+from __future__ import annotations
+
+import json
+import time
+from pathlib import Path
+from typing import Any
+
+
+def generate_report(
+    schema_result: dict,
+    coverage_result: dict,
+    audit_result: dict | None,
+    *,
+    commit: str = "",
+    branch: str = "main",
+    output_path: str | None = None,
+) -> dict:
+    """Assemble the three-layer report and return it.
+
+    Args:
+        schema_result: ``{"verdict": "PASS"|"FAIL", "total_checks": N, "passed": N, "failed": N}``
+        coverage_result: ``{"verdict": "PASS"|"FAIL", "coverage_rate": float,
+                           "stability": {"runs": N, "values": [...], "std": float}}``
+        audit_result: ``{"verdict": "ACCEPT"|"REJECT", "inadequate_ratio": float,
+                         "rationale": str, "section_assessments": [...]}`` or None
+        commit: git commit SHA
+        branch: branch name
+        output_path: if set, write the report JSON to this path
+
+    Returns the report dict.
+    """
+    layers: dict[str, Any] = {
+        "A_schema": schema_result,
+        "B_coverage": coverage_result,
+    }
+    if audit_result is not None:
+        layers["C_qe_audit"] = audit_result
+
+    # ── final verdict ──
+    a_pass = schema_result.get("verdict") == "PASS"
+    b_pass = coverage_result.get("verdict") == "PASS"
+    c_pass = (
+        audit_result is None
+        or audit_result.get("verdict") == "ACCEPT"
+    )
+    all_pass = a_pass and b_pass and c_pass
+
+    report = {
+        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
+        "commit": commit,
+        "branch": branch,
+        "layers": layers,
+        "final_verdict": "PASS" if all_pass else "FAIL",
+        "releasable": all_pass,
+        "failure_details": _failure_details(layers),
+    }
+
+    if output_path:
+        out = Path(output_path)
+        out.parent.mkdir(parents=True, exist_ok=True)
+        out.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    return report
+
+
+def _failure_details(layers: dict) -> list[str]:
+    """Summarise which layers failed and why."""
+    details: list[str] = []
+
+    schema = layers.get("A_schema", {})
+    if schema.get("verdict") != "PASS":
+        details.append(
+            f"Layer A (Schema): {schema.get('failed', '?')}/{schema.get('total_checks', '?')} checks failed"
+        )
+
+    coverage = layers.get("B_coverage", {})
+    if coverage.get("verdict") != "PASS":
+        cv = coverage.get("coverage_rate", "?")
+        details.append(f"Layer B (Coverage): rate={cv} (threshold: 0.70)")
+
+    audit = layers.get("C_qe_audit", {})
+    if audit.get("verdict") == "REJECT":
+        details.append(
+            f"Layer C (QE Audit): REJECT — inadequate_ratio={audit.get('inadequate_ratio', '?')}"
+        )
+
+    return details
+
+
+# ── Layer-specific result builders ──────────────────────────────────────────
+
+def schema_verdict(errors: list[str], stats: dict) -> dict:
+    """Build Layer A result from schema validation errors & stats."""
+    total = stats.get("total_rules", 0)
+    valid = stats.get("valid_rules", 0)
+    failed_checks = len(errors) + (total - valid)
+
+    return {
+        "verdict": "PASS" if failed_checks == 0 else "FAIL",
+        "total_checks": max(total, 1),  # at minimum, we checked the root
+        "passed": valid if failed_checks == 0 else valid,
+        "failed": failed_checks,
+        "rule_pass_rate": round(valid / max(total, 1), 2) if total > 0 else 0,
+        "sample_errors": errors[:10],  # first 10 for the report
+    }
+
+
+def coverage_verdict(
+    coverage_rate: float,
+    stability_std: float,
+    stability_values: list[float],
+    *,
+    coverage_threshold: float = 0.70,
+    stability_threshold: float = 0.05,
+    section_coverage: dict | None = None,
+    table_coverage: dict | None = None,
+    diagram_coverage: dict | None = None,
+) -> dict:
+    """Build Layer B result from coverage metrics."""
+    b1_pass = coverage_rate >= coverage_threshold
+    b2_pass = stability_std <= stability_threshold
+    both_pass = b1_pass and b2_pass
+
+    result: dict[str, Any] = {
+        "verdict": "PASS" if both_pass else "FAIL",
+        "coverage_rate": round(coverage_rate, 3),
+        "coverage_threshold": coverage_threshold,
+        "coverage_pass": b1_pass,
+        "stability": {
+            "runs": len(stability_values),
+            "values": [round(v, 3) for v in stability_values],
+            "std": round(stability_std, 4),
+            "threshold": stability_threshold,
+            "pass": b2_pass,
+        },
+    }
+
+    if section_coverage:
+        result["section_coverage"] = section_coverage
+    if table_coverage:
+        result["table_coverage"] = table_coverage
+    if diagram_coverage:
+        result["diagram_coverage"] = diagram_coverage
+
+    return result
+
+
+def audit_verdict(audit_data: dict, *, inadequate_threshold: float = 0.30) -> dict:
+    """Build Layer C result from LLM QE audit.
+
+    *audit_data* should contain:
+        inadequate_ratio: float
+        rationale: str
+        section_assessments: list[dict]
+    """
+    ratio = audit_data.get("inadequate_ratio", 1.0)
+    passed = ratio <= inadequate_threshold
+
+    return {
+        "verdict": "ACCEPT" if passed else "REJECT",
+        "inadequate_ratio": round(ratio, 3),
+        "threshold": inadequate_threshold,
+        "rationale": audit_data.get("rationale", ""),
+        "total_sections": audit_data.get("total_functional_sections", 0),
+        "adequate": audit_data.get("adequate", 0),
+        "inadequate": audit_data.get("inadequate", 0),
+        "not_applicable": audit_data.get("not_applicable", 0),
+    }