Initial commit: document_analyzer with CI/CD pipeline

- 4 skill pipeline (doc_parser, conflict_detection, ir_generation, resolution_application) - CI workflow on push/PR (.gitea/workflows/ci.yml) - Auto-issue on CI failure (.gitea/workflows/auto-issue.yml) - Pytest smoke tests (tests/test_sample.py) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 20:00:26 +08:00
commit 40567a4fb6
22 changed files with 2898 additions and 0 deletions
@@ -0,0 +1,123 @@
+import base64
+import logging
+import os
+from typing import Optional
+
+from LLM import LLMClient
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Prompts
+# ---------------------------------------------------------------------------
+
+PROMPT_IMAGE = """请分析这张图片，判断类型并输出文字描述。
+
+## 判断图片类型
+
+如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**，详细描述：
+- 图中所有节点/步骤/状态/组件的名称
+- 所有连线/箭头/转换关系及其方向
+- 所有分支条件、判断逻辑和判断结果
+- 所有文字标注、注释、标签
+- 图的整体结构和逻辑流程
+- 如果图片包含多个子图，拆解描述
+
+如果是 **其他类型**（UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等），简要描述图片内容。
+
+## 输出格式
+
+**1. 类型标签（单独一行）：**
+type: <flowchart|architecture|state|sequence|activity|other>
+
+**2. 文字描述：**
+该图片的详细文字描述。
+
+不要输出 ---YAML--- 分隔符或 YAML 内容，不要添加任何额外的解释或问候语。"""
+
+
+# ---------------------------------------------------------------------------
+# ImageParser
+# ---------------------------------------------------------------------------
+
+class ImageParser:
+    """Vision LLM wrapper for parsing images (type + description).
+
+    Usage::
+
+        parser = ImageParser()
+        result = parser.parse_image("images/img1.png")
+    """
+
+    _VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "text"}
+
+    def __init__(self, llm: LLMClient | None = None):
+        self._llm = llm or LLMClient()
+
+    @property
+    def usage(self) -> dict:
+        return self._llm.usage
+
+    def parse_image(self, image_path: str) -> Optional[dict]:
+        """Parse an image and return its type and description (no YAML IR).
+
+        Returns ``{type, description}``, or *None* for UI mockups.
+        """
+        logger.info("Parsing image: %s", image_path)
+
+        with open(image_path, "rb") as f:
+            img_b64 = base64.b64encode(f.read()).decode()
+        mime = self._mime_type(image_path)
+
+        try:
+            content = self._llm.chat(
+                model=LLMClient.IMAGE_MODEL,
+                messages=[{
+                    "role": "user",
+                    "content": [
+                        {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{img_b64}"}},
+                        {"type": "text", "text": PROMPT_IMAGE},
+                    ],
+                }],
+            )
+        except RuntimeError as e:
+            logger.error(str(e))
+            return {"type": "other", "description": "", "error": str(e)}
+
+        parsed = self._parse_type_and_description(content)
+        if parsed is None:
+            return None
+        return {"type": parsed[0], "description": parsed[1]}
+
+    # ---- internals ----------------------------------------------------------
+
+    def _parse_type_and_description(self, content: str) -> Optional[tuple[str, str]]:
+        """Extract ``(type, description)`` from LLM response.
+
+        Returns *None* for ``[[UI]]`` (skip).
+        """
+        content = content.strip()
+        if content == "[[UI]]" or content.startswith("[[UI]]"):
+            return None
+
+        parsed_type = "other"
+        desc_lines: list[str] = []
+        for line in content.splitlines():
+            stripped = line.strip()
+            if (stripped.startswith("type:") or stripped.startswith("类型:")) and parsed_type == "other":
+                type_val = stripped.split(":", 1)[1].strip().lower()
+                if type_val in self._VALID_TYPES:
+                    parsed_type = type_val
+            else:
+                desc_lines.append(line)
+
+        return parsed_type, "\n".join(desc_lines).strip()
+
+    @staticmethod
+    def _mime_type(image_path: str) -> str:
+        ext = os.path.splitext(image_path)[1].lstrip(".").lower()
+        return {
+            "png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg",
+            "gif": "image/gif", "bmp": "image/bmp",
+            "webp": "image/webp", "svg": "image/svg+xml", "tiff": "image/tiff",
+        }.get(ext, "image/png")