- 4 skill pipeline (doc_parser, conflict_detection, ir_generation, resolution_application) - CI workflow on push/PR (.gitea/workflows/ci.yml) - Auto-issue on CI failure (.gitea/workflows/auto-issue.yml) - Pytest smoke tests (tests/test_sample.py) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,123 @@
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from LLM import LLMClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompts
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROMPT_IMAGE = """请分析这张图片,判断类型并输出文字描述。
|
||||
|
||||
## 判断图片类型
|
||||
|
||||
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,详细描述:
|
||||
- 图中所有节点/步骤/状态/组件的名称
|
||||
- 所有连线/箭头/转换关系及其方向
|
||||
- 所有分支条件、判断逻辑和判断结果
|
||||
- 所有文字标注、注释、标签
|
||||
- 图的整体结构和逻辑流程
|
||||
- 如果图片包含多个子图,拆解描述
|
||||
|
||||
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),简要描述图片内容。
|
||||
|
||||
## 输出格式
|
||||
|
||||
**1. 类型标签(单独一行):**
|
||||
type: <flowchart|architecture|state|sequence|activity|other>
|
||||
|
||||
**2. 文字描述:**
|
||||
该图片的详细文字描述。
|
||||
|
||||
不要输出 ---YAML--- 分隔符或 YAML 内容,不要添加任何额外的解释或问候语。"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ImageParser
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ImageParser:
|
||||
"""Vision LLM wrapper for parsing images (type + description).
|
||||
|
||||
Usage::
|
||||
|
||||
parser = ImageParser()
|
||||
result = parser.parse_image("images/img1.png")
|
||||
"""
|
||||
|
||||
_VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "text"}
|
||||
|
||||
def __init__(self, llm: LLMClient | None = None):
|
||||
self._llm = llm or LLMClient()
|
||||
|
||||
@property
|
||||
def usage(self) -> dict:
|
||||
return self._llm.usage
|
||||
|
||||
def parse_image(self, image_path: str) -> Optional[dict]:
|
||||
"""Parse an image and return its type and description (no YAML IR).
|
||||
|
||||
Returns ``{type, description}``, or *None* for UI mockups.
|
||||
"""
|
||||
logger.info("Parsing image: %s", image_path)
|
||||
|
||||
with open(image_path, "rb") as f:
|
||||
img_b64 = base64.b64encode(f.read()).decode()
|
||||
mime = self._mime_type(image_path)
|
||||
|
||||
try:
|
||||
content = self._llm.chat(
|
||||
model=LLMClient.IMAGE_MODEL,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{img_b64}"}},
|
||||
{"type": "text", "text": PROMPT_IMAGE},
|
||||
],
|
||||
}],
|
||||
)
|
||||
except RuntimeError as e:
|
||||
logger.error(str(e))
|
||||
return {"type": "other", "description": "", "error": str(e)}
|
||||
|
||||
parsed = self._parse_type_and_description(content)
|
||||
if parsed is None:
|
||||
return None
|
||||
return {"type": parsed[0], "description": parsed[1]}
|
||||
|
||||
# ---- internals ----------------------------------------------------------
|
||||
|
||||
def _parse_type_and_description(self, content: str) -> Optional[tuple[str, str]]:
|
||||
"""Extract ``(type, description)`` from LLM response.
|
||||
|
||||
Returns *None* for ``[[UI]]`` (skip).
|
||||
"""
|
||||
content = content.strip()
|
||||
if content == "[[UI]]" or content.startswith("[[UI]]"):
|
||||
return None
|
||||
|
||||
parsed_type = "other"
|
||||
desc_lines: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if (stripped.startswith("type:") or stripped.startswith("类型:")) and parsed_type == "other":
|
||||
type_val = stripped.split(":", 1)[1].strip().lower()
|
||||
if type_val in self._VALID_TYPES:
|
||||
parsed_type = type_val
|
||||
else:
|
||||
desc_lines.append(line)
|
||||
|
||||
return parsed_type, "\n".join(desc_lines).strip()
|
||||
|
||||
@staticmethod
|
||||
def _mime_type(image_path: str) -> str:
|
||||
ext = os.path.splitext(image_path)[1].lstrip(".").lower()
|
||||
return {
|
||||
"png": "image/png", "jpg": "image/jpeg", "jpeg": "image/jpeg",
|
||||
"gif": "image/gif", "bmp": "image/bmp",
|
||||
"webp": "image/webp", "svg": "image/svg+xml", "tiff": "image/tiff",
|
||||
}.get(ext, "image/png")
|
||||
Reference in New Issue
Block a user