document_analyzer/tests/acceptance/report.py

"""Structured JSON report generation for QE acceptance test results.

Produces a unified report with three-layer verdict:
  Layer A – Schema compliance
  Layer B – Structural coverage + stability
  Layer C – LLM QE expert audit

Final verdict: PASS (releasable) or FAIL (blocked).
"""

from __future__ import annotations

import json
import time
from pathlib import Path
from typing import Any


def generate_report(
    schema_result: dict,
    coverage_result: dict,
    audit_result: dict | None,
    *,
    commit: str = "",
    branch: str = "main",
    output_path: str | None = None,
) -> dict:
    """Assemble the three-layer report and return it.

    Args:
        schema_result: ``{"verdict": "PASS"|"FAIL", "total_checks": N, "passed": N, "failed": N}``
        coverage_result: ``{"verdict": "PASS"|"FAIL", "coverage_rate": float,
                           "stability": {"runs": N, "values": [...], "std": float}}``
        audit_result: ``{"verdict": "ACCEPT"|"REJECT", "inadequate_ratio": float,
                         "rationale": str, "section_assessments": [...]}`` or None
        commit: git commit SHA
        branch: branch name
        output_path: if set, write the report JSON to this path

    Returns the report dict.
    """
    layers: dict[str, Any] = {
        "A_schema": schema_result,
        "B_coverage": coverage_result,
    }
    if audit_result is not None:
        layers["C_qe_audit"] = audit_result

    # ── final verdict ──
    a_pass = schema_result.get("verdict") == "PASS"
    b_pass = coverage_result.get("verdict") == "PASS"
    c_pass = (
        audit_result is None
        or audit_result.get("verdict") == "ACCEPT"
    )
    all_pass = a_pass and b_pass and c_pass

    report = {
        "timestamp": time.strftime("%Y-%m-%dT%H:%M:%S"),
        "commit": commit,
        "branch": branch,
        "layers": layers,
        "final_verdict": "PASS" if all_pass else "FAIL",
        "releasable": all_pass,
        "failure_details": _failure_details(layers),
    }

    if output_path:
        out = Path(output_path)
        out.parent.mkdir(parents=True, exist_ok=True)
        out.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")

    return report


def _failure_details(layers: dict) -> list[str]:
    """Summarise which layers failed and why."""
    details: list[str] = []

    schema = layers.get("A_schema", {})
    if schema.get("verdict") != "PASS":
        details.append(
            f"Layer A (Schema): {schema.get('failed', '?')}/{schema.get('total_checks', '?')} checks failed"
        )

    coverage = layers.get("B_coverage", {})
    if coverage.get("verdict") != "PASS":
        cv = coverage.get("coverage_rate", "?")
        details.append(f"Layer B (Coverage): rate={cv} (threshold: 0.70)")

    audit = layers.get("C_qe_audit", {})
    if audit.get("verdict") == "REJECT":
        details.append(
            f"Layer C (QE Audit): REJECT — inadequate_ratio={audit.get('inadequate_ratio', '?')}"
        )

    return details


# ── Layer-specific result builders ──────────────────────────────────────────

def schema_verdict(errors: list[str], stats: dict) -> dict:
    """Build Layer A result from schema validation errors & stats."""
    total = stats.get("total_rules", 0)
    valid = stats.get("valid_rules", 0)
    failed_checks = len(errors) + (total - valid)

    return {
        "verdict": "PASS" if failed_checks == 0 else "FAIL",
        "total_checks": max(total, 1),  # at minimum, we checked the root
        "passed": valid if failed_checks == 0 else valid,
        "failed": failed_checks,
        "rule_pass_rate": round(valid / max(total, 1), 2) if total > 0 else 0,
        "sample_errors": errors[:10],  # first 10 for the report
    }


def coverage_verdict(
    coverage_rate: float,
    stability_std: float,
    stability_values: list[float],
    *,
    coverage_threshold: float = 0.70,
    stability_threshold: float = 0.05,
    section_coverage: dict | None = None,
    table_coverage: dict | None = None,
    diagram_coverage: dict | None = None,
) -> dict:
    """Build Layer B result from coverage metrics."""
    b1_pass = coverage_rate >= coverage_threshold
    b2_pass = stability_std <= stability_threshold
    both_pass = b1_pass and b2_pass

    result: dict[str, Any] = {
        "verdict": "PASS" if both_pass else "FAIL",
        "coverage_rate": round(coverage_rate, 3),
        "coverage_threshold": coverage_threshold,
        "coverage_pass": b1_pass,
        "stability": {
            "runs": len(stability_values),
            "values": [round(v, 3) for v in stability_values],
            "std": round(stability_std, 4),
            "threshold": stability_threshold,
            "pass": b2_pass,
        },
    }

    if section_coverage:
        result["section_coverage"] = section_coverage
    if table_coverage:
        result["table_coverage"] = table_coverage
    if diagram_coverage:
        result["diagram_coverage"] = diagram_coverage

    return result


def audit_verdict(audit_data: dict, *, inadequate_threshold: float = 0.30) -> dict:
    """Build Layer C result from LLM QE audit.

    *audit_data* should contain:
        inadequate_ratio: float
        rationale: str
        section_assessments: list[dict]
    """
    ratio = audit_data.get("inadequate_ratio", 1.0)
    passed = ratio <= inadequate_threshold

    return {
        "verdict": "ACCEPT" if passed else "REJECT",
        "inadequate_ratio": round(ratio, 3),
        "threshold": inadequate_threshold,
        "rationale": audit_data.get("rationale", ""),
        "total_sections": audit_data.get("total_functional_sections", 0),
        "adequate": audit_data.get("adequate", 0),
        "inadequate": audit_data.get("inadequate", 0),
        "not_applicable": audit_data.get("not_applicable", 0),
    }