Initial commit: document_analyzer with CI/CD pipeline

- 4 skill pipeline (doc_parser, conflict_detection, ir_generation, resolution_application) - CI workflow on push/PR (.gitea/workflows/ci.yml) - Auto-issue on CI failure (.gitea/workflows/auto-issue.yml) - Pytest smoke tests (tests/test_sample.py) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 20:00:26 +08:00
commit 40567a4fb6
22 changed files with 2898 additions and 0 deletions
@@ -0,0 +1,37 @@
+---
+name: 冲突检测技能
+description: 分析解析后的文档，检测图表类图像与其相应文本描述之间的矛盾和条件不匹配。
+---
+
+# 冲突检测技能
+
+## 概述
+
+此技能识别解析文档中文本内容与视觉内容之间的潜在冲突。它特别针对图表类图像（流程图、架构图、状态图、序列图和活动图）并交叉检查其描述与同文档部分的文本内容。
+
+## 功能
+
+该技能：
+- 从解析的文档结构中识别图表类图像
+- 将图像描述与同一文档部分中的相应文本内容进行交叉引用
+- 检测视觉表示和文本表示之间的矛盾和条件不匹配
+- 生成包含其位置的已识别冲突的结构化列表
+- 专门针对流程图、架构图、状态图、序列图和活动图
+
+## 输入要求
+
+- 解析文档JSON文件的路径（由文档解析技能生成）
+- 可选输出目录规范
+- 可选试运行标志，在不调用API的情况下预览大语言模型提示
+
+## 输出
+
+该技能生成一个结构化JSON文件，文件名为输入文档的基本名称后跟'_conflicts.json'，包含：
+- 带有关于差异详情的冲突对象列表
+- 标识每个冲突发生位置的节标识符
+- 冲突图像和文本内容的片段
+- 每个冲突的类型分类（例如，矛盾、条件不匹配）
+
+## 集成点
+
+此技能消耗文档解析技能的输出并为解决方案应用技能提供输入。冲突解决过程通常需要人工输入才能进入下一阶段。
@@ -0,0 +1,105 @@
+import logging
+import os
+import time
+from typing import Optional
+
+from openai import OpenAI
+
+logger = logging.getLogger(__name__)
+
+
+class LLMClient:
+    """Low-level OpenAI-compatible LLM client with retry and token tracking.
+
+    Usage::
+
+        llm = LLMClient()
+        content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
+        print(llm.usage)
+    """
+
+    IMAGE_MODEL = "qwen3-vl-plus"
+    TEXT_MODEL = "qwen3.5-flash-2026-02-23"
+    TIMEOUT = 120
+    MAX_RETRIES = 3
+
+    def __init__(
+        self,
+        *,
+        base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
+        timeout: int | None = None,
+    ):
+        key = os.environ.get("DASHSCOPE_API_KEY", "")
+        if not key:
+            raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
+        self._client = OpenAI(api_key=key, base_url=base_url)
+        self._timeout = timeout or self.TIMEOUT
+        self._prompt_tokens = 0
+        self._completion_tokens = 0
+
+    @property
+    def usage(self) -> dict:
+        """Return accumulated token counts as ``{prompt, completion, total}``."""
+        return {
+            "prompt_tokens": self._prompt_tokens,
+            "completion_tokens": self._completion_tokens,
+            "total_tokens": self._prompt_tokens + self._completion_tokens,
+        }
+
+    @staticmethod
+    def estimate_tokens(text: str) -> int:
+        """Quick token estimate.  CJK ≈1.7/token, others ≈3.0/token."""
+        cjk = sum(1 for c in text if '一' <= c <= '鿿' or '　' <= c <= '〿')
+        other = len(text) - cjk
+        return max(1, int(cjk / 1.7 + other / 3.0))
+
+    @staticmethod
+    def estimate_image_tokens() -> int:
+        """Fixed estimate for one vision-model image (~500 tokens)."""
+        return 500
+
+    def chat(
+        self, model: str, messages: list[dict], *, timeout: int | None = None,
+        response_format: dict | None = None,
+    ) -> str:
+        """Send a chat completion request and return the response content.
+
+        Automatically retries on failure and accumulates token usage.
+        """
+        label = f"chat({model})"
+
+        def _call():
+            t0 = time.time()
+            kwargs = dict(model=model, messages=messages, timeout=timeout or self._timeout)
+            if response_format is not None:
+                kwargs["response_format"] = response_format
+            kwargs["temperature"] = 0
+            resp = self._client.chat.completions.create(**kwargs)
+            content = resp.choices[0].message.content
+            usg = resp.usage
+            if usg:
+                self._prompt_tokens += usg.prompt_tokens
+                self._completion_tokens += usg.completion_tokens
+            elapsed = time.time() - t0
+            logger.info("%s: %d chars in %.1fs", label, len(content) if content else 0, elapsed)
+            if not content:
+                raise RuntimeError("Empty response from LLM")
+            return content
+
+        return self._retry(_call, label)
+
+    def _retry(self, fn, label: str) -> str:
+        """Call *fn()* with exponential-backoff retry."""
+        last_error: Optional[Exception] = None
+        for attempt in range(self.MAX_RETRIES):
+            try:
+                return fn()
+            except Exception as e:
+                last_error = e
+                logger.warning(
+                    "%s error (attempt %d/%d): %s",
+                    label, attempt + 1, self.MAX_RETRIES, e,
+                )
+                if attempt < self.MAX_RETRIES - 1:
+                    time.sleep(2 ** attempt)
+        raise RuntimeError(f"{label}: all retries exhausted") from last_error
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""Detect logical conflicts between image analysis and text in ``_parsed.json``.
+
+Usage::
+
+    python scripts/detect_conflicts.py D:/projects/jike/output/车机娱乐系统禁止功能文档_精简_parsed.json [--output-dir DIR]
+
+For each diagram-type image (flowchart, architecture, state, sequence, activity),
+the script locates its section via *image_sources*, grabs the corresponding text
+blocks, and calls an LLM to find contradictions/condition-mismatches between the
+image description and the text.
+
+Output: ``<basename>_conflicts.json``
+"""
+
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+import time
+
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+from LLM import LLMClient
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+logger = logging.getLogger(__name__)
+
+RATE_LIMIT_DELAY = 0.5
+DIAGRAM_TYPES = {"flowchart", "architecture", "state", "sequence", "activity"}
+MIN_TEXT_CHARS = 20
+
+PROMPT_DETECT_CONFLICT = """你是一个文档一致性检查专家。以下内容来自同一份需求文档的同一个章节，包含两部分：
+
+## 部分1：图片（流程图/架构图/状态图）的描述
+```
+{image_description}
+```
+
+## 部分2：同章节的文字描述
+```
+{text_description}
+```
+
+## 你的任务
+检查这两部分之间是否存在**逻辑矛盾或条件不一致**。
+
+你需要关注的冲突类型：
+
+1. **condition_mismatch**（条件不一致）：两者描述了同一规则，但触发条件、阈值、时序不同。
+   例如：图片说"车速≥15km/h且持续5秒"，文字说"车速≥10km/h且持续3秒"。
+   例如：图片说"非P档限制"，文字说"车速>0限制"。
+
+2. **contradiction**（直接矛盾）：两者对同一事物的描述完全相反。
+   例如：图片说"功能X被禁止"，文字说"功能X可用"。
+   例如：图片说"开关默认关闭"，文字说"开关默认开启"。
+
+3. **scope_mismatch**（范围不一致）：两者描述的场景/地域/设备范围不同。
+   例如：图片说"国内方案"，文字说"海外方案"。
+   例如：图片说"CSD中控屏"，文字描述包含"PSD副驾屏"。
+
+## 输出格式
+
+如果**没有冲突**，只输出：
+```
+[[NO_CONFLICT]]
+```
+
+如果**有冲突**，输出以下JSON数组（不要任何其他文字）：
+
+```json
+[
+  {{
+    "conflict_type": "condition_mismatch",
+    "severity": "high",
+    "section": "{section_name}",
+    "image_snippet": "图片中描述的关键内容（摘录）",
+    "text_snippet": "文字中描述的关键内容（摘录）",
+    "description": "用中文说明冲突的具体差异"
+  }}
+]
+```
+
+注意：
+- 每个冲突一个条目，不要合并
+- severity: high（功能正确性受影响）| medium（边界条件模糊）| low（表达方式差异）
+- 输出必须是**严格合法的JSON数组**，不要有尾随逗号
+- 如果没有严格冲突，输出 [[NO_CONFLICT]]
+"""
+
+
+def _build_text_for_section(sections: list[dict], section_name: str) -> str:
+    """Build a single text block for the given section name."""
+    texts: list[str] = []
+    for sec in sections:
+        if sec.get("source", "") == section_name:
+            for blk in sec.get("blocks", []):
+                if blk["type"] == "para":
+                    texts.append(blk["text"])
+                elif blk["type"] == "table":
+                    table_lines = [f"表格 {blk['table']}:"]
+                    for ri, row in enumerate(blk.get("rows", [])):
+                        cols = row.get("columns", [])
+                        parts = [f"{c['name']}: {c['text']}" for c in cols]
+                        table_lines.append(f"  行{ri + 1}: {' | '.join(parts)}")
+                    texts.append("\n".join(table_lines))
+    return "\n\n".join(texts)
+
+
+def _parse_conflict_json(content: str) -> list[dict]:
+    """Extract JSON array from LLM response, handling markdown fences."""
+    stripped = content.strip()
+
+    if "[[NO_CONFLICT]]" in stripped:
+        return []
+
+    # Remove markdown code fences
+    if "```json" in stripped:
+        stripped = stripped.split("```json", 1)[1]
+        if "```" in stripped:
+            stripped = stripped.split("```", 1)[0]
+    elif "```" in stripped:
+        stripped = stripped.split("```", 1)[1]
+        if "```" in stripped:
+            stripped = stripped.split("```", 1)[0]
+
+    stripped = stripped.strip()
+    if not stripped:
+        return []
+
+    # Try to find a JSON array
+    match = re.search(r"\[\s*\{.*\}\s*\]", stripped, re.DOTALL)
+    if match:
+        stripped = match.group()
+
+    try:
+        conflicts = json.loads(stripped)
+        if isinstance(conflicts, list):
+            return conflicts
+        return []
+    except json.JSONDecodeError as e:
+        logger.warning("Failed to parse conflict JSON: %s", e)
+        logger.debug("Raw content: %s", stripped)
+        return []
+
+
+def detect_conflicts(
+    parsed_path: str,
+    output_dir: str | None = None,
+    *,
+    dry_run: bool = False,
+) -> list[dict]:
+    """Load ``_parsed.json`` and detect image-vs-text conflicts.
+
+    Returns a flat list of conflict dicts and writes to ``<basename>_conflicts.json``.
+    """
+    with open(parsed_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    basename = os.path.splitext(os.path.basename(parsed_path))[0]
+    if basename.endswith("_parsed"):
+        basename = basename[:-7]
+
+    if output_dir is None:
+        output_dir = os.path.dirname(os.path.abspath(parsed_path))
+    os.makedirs(output_dir, exist_ok=True)
+
+    sections = data.get("sections", [])
+    image_sources = data.get("image_sources", {})
+    image_analysis = data.get("image_analysis", [])
+
+    llm = LLMClient()
+    all_conflicts: list[dict] = []
+
+    # ---- For each diagram image, compare with its section text -------------
+    for img in image_analysis:
+        img_type = img.get("type", "other")
+        rid = img.get("rid", "")
+        description = img.get("description", "").strip()
+
+        if img_type not in DIAGRAM_TYPES or not description:
+            logger.info("Skip conflict check: rid=%s type=%s", rid, img_type)
+            continue
+
+        # Find source section
+        src = image_sources.get(rid, {})
+        section_name = src.get("section", "")
+
+        if not section_name:
+            logger.warning("No section found for rid=%s, skipping", rid)
+            continue
+
+        # Build text from the same section
+        text_content = _build_text_for_section(sections, section_name)
+        text_len = len(text_content.strip())
+        if text_len < MIN_TEXT_CHARS:
+            logger.info("Section text too short (%d chars) for rid=%s, skip", text_len, rid)
+            continue
+
+        logger.info("Checking conflicts: rid=%s section=%s (desc=%d chars, text=%d chars)",
+                     rid, section_name, len(description), text_len)
+
+        if dry_run:
+            logger.info("  [DRY RUN] would call LLM to detect conflicts")
+            continue
+
+        prompt = PROMPT_DETECT_CONFLICT.format(
+            image_description=description,
+            text_description=text_content,
+            section_name=section_name,
+        )
+
+        try:
+            raw = llm.chat(
+                model=LLMClient.TEXT_MODEL,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            logger.info("Conflict check response: %d chars", len(raw))
+        except RuntimeError as e:
+            logger.error("Conflict check failed: %s", e)
+            continue
+
+        conflicts = _parse_conflict_json(raw)
+
+        # Enrich with location info
+        for c in conflicts:
+            c["rid"] = rid
+            c["image_path"] = img.get("path", "")
+            if "section" not in c:
+                c["section"] = section_name
+            if src.get("table"):
+                c.setdefault("source_location", {})["table"] = src["table"]
+            if src.get("row"):
+                c.setdefault("source_location", {})["image_row"] = src["row"]
+
+        all_conflicts.extend(conflicts)
+        logger.info("  Found %d conflicts for rid=%s", len(conflicts), rid)
+
+        if any(x.get("type") in DIAGRAM_TYPES
+               for x in image_analysis
+               if x.get("rid", "") != rid):
+            time.sleep(RATE_LIMIT_DELAY)
+
+    # ---- Save ---------------------------------------------------------------
+    conflicts_path = os.path.join(output_dir, f"{basename}_conflicts.json")
+    with open(conflicts_path, "w", encoding="utf-8") as f:
+        json.dump(all_conflicts, f, ensure_ascii=False, indent=2)
+    logger.info("Saved: %s (%d conflicts)", conflicts_path, len(all_conflicts))
+
+    # ---- Summary ------------------------------------------------------------
+    usg = llm.usage
+    logger.info("Tokens: %d prompt + %d completion = %d total",
+                usg["prompt_tokens"], usg["completion_tokens"], usg["total_tokens"])
+
+    return all_conflicts
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Detect image-vs-text conflicts in parsed document.",
+    )
+    parser.add_argument("input", metavar="parsed.json", help="Path to _parsed.json from doc_parser")
+    parser.add_argument("--output-dir", metavar="DIR", default=None,
+                        help="Output directory (default: same as input)")
+    parser.add_argument("--dry-run", action="store_true",
+                        help="Print LLM prompts without calling the API.")
+
+    args = parser.parse_args()
+    detect_conflicts(args.input, output_dir=args.output_dir, dry_run=args.dry_run)