doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,38 +1,97 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Resolve secrets file: priority 1) env OPENCLAW_SECRETS,
|
||||
# 2) workspace-document-analyzer/config/ (relative to skills dir),
|
||||
# 3) .openclaw/config/
|
||||
_SECRETS_FILE = None
|
||||
for _candidate in (
|
||||
os.environ.get("OPENCLAW_SECRETS", ""),
|
||||
Path(__file__).resolve().parents[3] / "config" / "secrets.yaml",
|
||||
Path(__file__).resolve().parents[5] / ".openclaw" / "config" / "secrets.yaml",
|
||||
):
|
||||
if _candidate and Path(_candidate).exists():
|
||||
_SECRETS_FILE = Path(_candidate)
|
||||
break
|
||||
if _SECRETS_FILE is None:
|
||||
_SECRETS_FILE = Path("") # empty fallback
|
||||
|
||||
|
||||
def _load_secrets() -> dict:
|
||||
"""Load API keys from secrets.yaml, with env-var overrides."""
|
||||
secrets = {}
|
||||
if _SECRETS_FILE.exists():
|
||||
try:
|
||||
import yaml
|
||||
with open(_SECRETS_FILE, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
for provider in ("deepseek", "dashscope"):
|
||||
if provider in data and isinstance(data[provider], dict):
|
||||
secrets[provider] = data[provider]
|
||||
except ImportError:
|
||||
logger.warning("pyyaml not installed, cannot read %s", _SECRETS_FILE)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load %s: %s", _SECRETS_FILE, e)
|
||||
|
||||
# Env overrides
|
||||
dk_env = os.environ.get("DEEPSEEK_API_KEY", "")
|
||||
ds_env = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||
if dk_env:
|
||||
secrets.setdefault("deepseek", {})["apiKey"] = dk_env
|
||||
if ds_env:
|
||||
secrets.setdefault("dashscope", {})["apiKey"] = ds_env
|
||||
return secrets
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
|
||||
"""Multi-provider LLM client with retry and token tracking.
|
||||
|
||||
Routes text models to DeepSeek, vision models to DashScope (Bailian).
|
||||
Reads API keys from openclaw config/secrets.yaml, with env-var overrides.
|
||||
|
||||
Usage::
|
||||
|
||||
llm = LLMClient()
|
||||
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
|
||||
content = llm.chat("deepseek-v4-pro", [{"role": "user", "content": "Hello"}])
|
||||
print(llm.usage)
|
||||
"""
|
||||
|
||||
IMAGE_MODEL = "qwen3-vl-plus"
|
||||
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
|
||||
TEXT_MODEL = "deepseek-v4-flash"
|
||||
|
||||
DASHSCOPE_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
DEEPSEEK_BASE = "https://api.deepseek.com/v1"
|
||||
|
||||
TIMEOUT = 120
|
||||
MAX_RETRIES = 3
|
||||
|
||||
_VISION_KEYWORDS = ("vl", "vision", "qwen-vl", "qwen3-vl")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
timeout: int | None = None,
|
||||
):
|
||||
key = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||
if not key:
|
||||
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
|
||||
self._client = OpenAI(api_key=key, base_url=base_url)
|
||||
secrets = _load_secrets()
|
||||
|
||||
ds_cfg = secrets.get("dashscope", {})
|
||||
dk_cfg = secrets.get("deepseek", {})
|
||||
|
||||
dashscope_key = ds_cfg.get("apiKey", "")
|
||||
dashscope_url = ds_cfg.get("baseUrl", self.DASHSCOPE_BASE)
|
||||
deepseek_key = dk_cfg.get("apiKey", "")
|
||||
deepseek_url = dk_cfg.get("baseUrl", self.DEEPSEEK_BASE)
|
||||
|
||||
self._ds_client = OpenAI(api_key=dashscope_key, base_url=dashscope_url) if dashscope_key else None
|
||||
self._dk_client = OpenAI(api_key=deepseek_key, base_url=deepseek_url) if deepseek_key else None
|
||||
|
||||
self._timeout = timeout or self.TIMEOUT
|
||||
self._prompt_tokens = 0
|
||||
self._completion_tokens = 0
|
||||
@@ -49,7 +108,7 @@ class LLMClient:
|
||||
@staticmethod
|
||||
def estimate_tokens(text: str) -> int:
|
||||
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
|
||||
cjk = sum(1 for c in text if '一' <= c <= '鿿' or ' ' <= c <= '〿')
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3000' <= c <= '\u303f')
|
||||
other = len(text) - cjk
|
||||
return max(1, int(cjk / 1.7 + other / 3.0))
|
||||
|
||||
@@ -58,6 +117,20 @@ class LLMClient:
|
||||
"""Fixed estimate for one vision-model image (~500 tokens)."""
|
||||
return 500
|
||||
|
||||
@staticmethod
|
||||
def _is_vision_model(model: str) -> bool:
|
||||
return any(kw in model.lower() for kw in LLMClient._VISION_KEYWORDS)
|
||||
|
||||
def _get_client(self, model: str) -> OpenAI:
|
||||
if self._is_vision_model(model):
|
||||
if self._ds_client is None:
|
||||
raise ValueError("DASHSCOPE_API_KEY not set but required for vision model")
|
||||
return self._ds_client
|
||||
else:
|
||||
if self._dk_client is None:
|
||||
raise ValueError("DEEPSEEK_API_KEY not set but required for text model")
|
||||
return self._dk_client
|
||||
|
||||
def chat(
|
||||
self, model: str, messages: list[dict], *, timeout: int | None = None,
|
||||
response_format: dict | None = None,
|
||||
@@ -65,8 +138,10 @@ class LLMClient:
|
||||
"""Send a chat completion request and return the response content.
|
||||
|
||||
Automatically retries on failure and accumulates token usage.
|
||||
Routes to DeepSeek for text, DashScope for vision.
|
||||
"""
|
||||
label = f"chat({model})"
|
||||
client = self._get_client(model)
|
||||
|
||||
def _call():
|
||||
t0 = time.time()
|
||||
@@ -74,7 +149,7 @@ class LLMClient:
|
||||
if response_format is not None:
|
||||
kwargs["response_format"] = response_format
|
||||
kwargs["temperature"] = 0
|
||||
resp = self._client.chat.completions.create(**kwargs)
|
||||
resp = client.chat.completions.create(**kwargs)
|
||||
content = resp.choices[0].message.content
|
||||
usg = resp.usage
|
||||
if usg:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from LLM import LLMClient
|
||||
@@ -8,32 +10,56 @@ from LLM import LLMClient
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompts
|
||||
# Prompt loading
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROMPT_IMAGE = """请分析这张图片,判断类型并输出文字描述。
|
||||
def _load_prompt() -> str:
|
||||
"""Load PROMPT_IMAGE from external file, falling back to inline default."""
|
||||
prompt_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "prompts")
|
||||
prompt_path = os.path.join(prompt_dir, "image_prompt.md")
|
||||
if os.path.isfile(prompt_path):
|
||||
with open(prompt_path, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
# Fallback inline prompt (nested tree format)
|
||||
return """请分析这张图片,判断类型并输出文字描述和(如适用)结构化逻辑树。
|
||||
|
||||
## 判断图片类型
|
||||
|
||||
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,详细描述:
|
||||
- 图中所有节点/步骤/状态/组件的名称
|
||||
- 所有连线/箭头/转换关系及其方向
|
||||
- 所有分支条件、判断逻辑和判断结果
|
||||
- 所有文字标注、注释、标签
|
||||
- 图的整体结构和逻辑流程
|
||||
- 如果图片包含多个子图,拆解描述
|
||||
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,你需要输出三项内容:
|
||||
1. 类型标签
|
||||
2. **嵌套逻辑树 JSON**(见下方格式)
|
||||
3. 文字描述
|
||||
|
||||
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),简要描述图片内容。
|
||||
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),只输出类型标签和简要文字描述。
|
||||
|
||||
## 嵌套逻辑树 JSON 格式(仅流程图/架构图/状态图/时序图/活动图需要)
|
||||
|
||||
**核心原则:用嵌套的 `children` 数组表达流程的层级关系,而不是用 id 引用。**
|
||||
|
||||
节点类型:`start`(起始), `end`(结束), `process`(处理/状态), `decision`(判断), `action`(动作)
|
||||
|
||||
非判断节点的 `children` 是子节点数组。`end` 节点无 `children`。
|
||||
|
||||
判断节点的 `children` 格式:
|
||||
```json
|
||||
{"condition": "是", "node": {"id": "n6", "name": "...", "type": "action", "children": [...]}}
|
||||
```
|
||||
|
||||
每条从根到 `end` 的路径必须是完整逻辑链。decision 必须穷举所有分支。
|
||||
节点 id 使用 "n1", "n2", "n3"... 格式。
|
||||
|
||||
## 输出格式
|
||||
|
||||
**1. 类型标签(单独一行):**
|
||||
type: <flowchart|architecture|state|sequence|activity|other>
|
||||
|
||||
**2. 文字描述:**
|
||||
该图片的详细文字描述。
|
||||
logic_tree:
|
||||
{...}
|
||||
|
||||
不要输出 ---YAML--- 分隔符或 YAML 内容,不要添加任何额外的解释或问候语。"""
|
||||
description:
|
||||
该图片的详细文字描述。"""
|
||||
|
||||
PROMPT_IMAGE = _load_prompt()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -41,7 +67,10 @@ type: <flowchart|architecture|state|sequence|activity|other>
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ImageParser:
|
||||
"""Vision LLM wrapper for parsing images (type + description).
|
||||
"""Vision LLM wrapper for parsing images (type + description + logic_tree).
|
||||
|
||||
The nested-tree ``logic_tree`` is stored alongside a backward-compatible
|
||||
flat representation so downstream consumers are not broken.
|
||||
|
||||
Usage::
|
||||
|
||||
@@ -49,7 +78,7 @@ class ImageParser:
|
||||
result = parser.parse_image("images/img1.png")
|
||||
"""
|
||||
|
||||
_VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "text"}
|
||||
_VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "other"}
|
||||
|
||||
def __init__(self, llm: LLMClient | None = None):
|
||||
self._llm = llm or LLMClient()
|
||||
@@ -59,9 +88,9 @@ class ImageParser:
|
||||
return self._llm.usage
|
||||
|
||||
def parse_image(self, image_path: str) -> Optional[dict]:
|
||||
"""Parse an image and return its type and description (no YAML IR).
|
||||
"""Parse an image and return its type, description, and optional logic_tree.
|
||||
|
||||
Returns ``{type, description}``, or *None* for UI mockups.
|
||||
Returns ``{type, description, [logic_tree], [logic_tree_nested]}``.
|
||||
"""
|
||||
logger.info("Parsing image: %s", image_path)
|
||||
|
||||
@@ -84,34 +113,292 @@ class ImageParser:
|
||||
logger.error(str(e))
|
||||
return {"type": "other", "description": "", "error": str(e)}
|
||||
|
||||
parsed = self._parse_type_and_description(content)
|
||||
parsed = self._parse_response(content)
|
||||
if parsed is None:
|
||||
return None
|
||||
return {"type": parsed[0], "description": parsed[1]}
|
||||
ptype, description, logic_tree_nested = parsed
|
||||
|
||||
result: dict = {"type": ptype, "description": description}
|
||||
if logic_tree_nested is not None:
|
||||
result["logic_tree_nested"] = logic_tree_nested
|
||||
result["logic_tree"] = self._flatten_tree(logic_tree_nested)
|
||||
return result
|
||||
|
||||
# ---- internals ----------------------------------------------------------
|
||||
|
||||
def _parse_type_and_description(self, content: str) -> Optional[tuple[str, str]]:
|
||||
"""Extract ``(type, description)`` from LLM response.
|
||||
def _parse_response(self, content: str) -> Optional[tuple[str, str, Optional[dict]]]:
|
||||
"""Extract ``(type, description, logic_tree_nested)`` from LLM response.
|
||||
|
||||
Returns *None* for ``[[UI]]`` (skip).
|
||||
Parses the nested-tree format. Returns *None* for unparseable content.
|
||||
"""
|
||||
content = content.strip()
|
||||
if content == "[[UI]]" or content.startswith("[[UI]]"):
|
||||
return None
|
||||
|
||||
parsed_type = "other"
|
||||
desc_lines: list[str] = []
|
||||
for line in content.splitlines():
|
||||
stripped = line.strip()
|
||||
if (stripped.startswith("type:") or stripped.startswith("类型:")) and parsed_type == "other":
|
||||
type_val = stripped.split(":", 1)[1].strip().lower()
|
||||
if type_val in self._VALID_TYPES:
|
||||
parsed_type = type_val
|
||||
else:
|
||||
desc_lines.append(line)
|
||||
logic_tree = None
|
||||
description = ""
|
||||
|
||||
return parsed_type, "\n".join(desc_lines).strip()
|
||||
# --- type ---
|
||||
type_match = re.search(r'(?:type|类型):\s*(\S+)', content)
|
||||
if type_match:
|
||||
type_val = type_match.group(1).strip().lower()
|
||||
if type_val in self._VALID_TYPES:
|
||||
parsed_type = type_val
|
||||
|
||||
# --- logic_tree (anchored at line start) ---
|
||||
lt_match = re.search(r'(?m)^logic_tree:\s*', content)
|
||||
desc_match = re.search(r'(?m)^description:\s*', content)
|
||||
|
||||
if lt_match:
|
||||
lt_start = lt_match.end()
|
||||
lt_end = desc_match.start() if desc_match and desc_match.start() > lt_start else len(content)
|
||||
lt_raw = content[lt_start:lt_end].strip()
|
||||
|
||||
# Try multiple JSON extraction strategies
|
||||
logic_tree = self._extract_json(lt_raw)
|
||||
|
||||
if logic_tree is not None:
|
||||
is_valid, err_msg = self._validate_flowchart(logic_tree)
|
||||
if not is_valid:
|
||||
logger.warning("Flowchart validation warning: %s", err_msg)
|
||||
else:
|
||||
logger.info("Failed to extract logic_tree JSON. Raw block length=%d", len(lt_raw))
|
||||
logger.debug("Raw logic_tree block: %s", lt_raw[:500])
|
||||
elif parsed_type in self._VALID_TYPES - {"other"}:
|
||||
logger.info("Diagram type=%s but no logic_tree: in response. Response length=%d",
|
||||
parsed_type, len(content))
|
||||
logger.debug("Raw response (first 500): %s", content[:500])
|
||||
|
||||
# --- description ---
|
||||
if desc_match:
|
||||
description = content[desc_match.end():].strip()
|
||||
else:
|
||||
desc = content
|
||||
if type_match:
|
||||
desc = desc[type_match.end():]
|
||||
desc = re.sub(r'(?m)^logic_tree:\s*\{.*?\}\s*', '', desc, flags=re.DOTALL)
|
||||
description = desc.strip()
|
||||
|
||||
return parsed_type, description, logic_tree
|
||||
|
||||
@staticmethod
|
||||
def _validate_flowchart(tree: dict) -> tuple[bool, str]:
|
||||
"""Validate a nested flowchart tree structure.
|
||||
|
||||
Returns ``(is_valid, error_message)``. Non-fatal: returns ``False``
|
||||
with a warning message but the tree is still kept.
|
||||
"""
|
||||
if not isinstance(tree, dict):
|
||||
return False, "logic_tree is not a dict"
|
||||
|
||||
seen_ids: set[str] = set()
|
||||
|
||||
def _walk(node: dict, depth: int = 0) -> tuple[bool, str]:
|
||||
if depth > 20:
|
||||
return False, f"Tree too deep (>20) at node {node.get('id', '?')}"
|
||||
|
||||
nid = node.get("id", "")
|
||||
if not nid:
|
||||
return False, "Node missing 'id' field"
|
||||
if not isinstance(nid, str):
|
||||
return False, f"Node id must be string, got {type(nid).__name__}"
|
||||
if nid in seen_ids:
|
||||
return False, f"Duplicate node id: {nid}"
|
||||
seen_ids.add(nid)
|
||||
|
||||
ntype = node.get("type", "")
|
||||
if ntype not in ("start", "end", "process", "decision", "action"):
|
||||
return False, f"Unknown node type '{ntype}' at {nid}"
|
||||
|
||||
if ntype == "end":
|
||||
if "children" in node:
|
||||
return False, f"End node {nid} should not have children"
|
||||
return True, ""
|
||||
|
||||
children = node.get("children")
|
||||
if not children:
|
||||
if ntype != "end":
|
||||
return False, f"Non-end node {nid} ({ntype}) has no children"
|
||||
return True, ""
|
||||
|
||||
if not isinstance(children, list):
|
||||
return False, f"children of {nid} is not a list"
|
||||
|
||||
if ntype == "decision":
|
||||
for child in children:
|
||||
if not isinstance(child, dict):
|
||||
return False, f"decision child of {nid} is not a dict"
|
||||
if "condition" not in child:
|
||||
return False, f"decision child of {nid} missing 'condition'"
|
||||
if "node" not in child:
|
||||
return False, f"decision child of {nid} missing 'node'"
|
||||
ok, err = _walk(child["node"], depth + 1)
|
||||
if not ok:
|
||||
return False, err
|
||||
else:
|
||||
for child in children:
|
||||
if not isinstance(child, dict):
|
||||
return False, f"child of {nid} is not a dict"
|
||||
ok, err = _walk(child, depth + 1)
|
||||
if not ok:
|
||||
return False, err
|
||||
|
||||
return True, ""
|
||||
|
||||
return _walk(tree)
|
||||
|
||||
@staticmethod
|
||||
def _flatten_tree(tree: dict) -> dict:
|
||||
"""Convert a nested flowchart tree into the legacy flat-nodes format.
|
||||
|
||||
This preserves backward compatibility with downstream consumers
|
||||
(conflict_detection_skill, ir_generator) that expect the flat format.
|
||||
"""
|
||||
nodes: list[dict] = []
|
||||
root_name = ""
|
||||
|
||||
def _collect(node: dict):
|
||||
nonlocal root_name
|
||||
nid = node.get("id", "")
|
||||
ntype = node.get("type", "")
|
||||
name = node.get("name", "")
|
||||
|
||||
if root_name == "" and "children" in node:
|
||||
root_name = name
|
||||
|
||||
if ntype == "decision":
|
||||
branches = []
|
||||
for child in node.get("children", []):
|
||||
branches.append({
|
||||
"value": child.get("condition", ""),
|
||||
"target": child["node"].get("id", ""),
|
||||
})
|
||||
_collect(child["node"])
|
||||
nodes.append({
|
||||
"id": nid,
|
||||
"type": ntype,
|
||||
"condition": name,
|
||||
"branches": branches,
|
||||
})
|
||||
elif ntype in ("action", "process", "state"):
|
||||
nodes.append({
|
||||
"id": nid,
|
||||
"type": ntype,
|
||||
"description": name,
|
||||
})
|
||||
for child in node.get("children", []):
|
||||
_collect(child)
|
||||
elif ntype == "start":
|
||||
nodes.append({
|
||||
"id": nid,
|
||||
"type": ntype,
|
||||
"description": name,
|
||||
})
|
||||
for child in node.get("children", []):
|
||||
_collect(child)
|
||||
# end nodes are collected but have no children
|
||||
|
||||
_collect(tree)
|
||||
|
||||
# Add end nodes from the nested tree
|
||||
ends: list[dict] = []
|
||||
|
||||
def _collect_ends(node: dict):
|
||||
if node.get("type") == "end":
|
||||
ends.append({
|
||||
"id": node.get("id", ""),
|
||||
"type": "end",
|
||||
"description": node.get("name", ""),
|
||||
})
|
||||
elif "children" in node:
|
||||
for child in node.get("children", []):
|
||||
if isinstance(child, dict):
|
||||
if "node" in child:
|
||||
_collect_ends(child["node"])
|
||||
else:
|
||||
_collect_ends(child)
|
||||
|
||||
_collect_ends(tree)
|
||||
nodes.extend(ends)
|
||||
|
||||
return {"root": root_name, "nodes": nodes}
|
||||
|
||||
@staticmethod
|
||||
def extract_paths(tree: dict) -> list[list[dict]]:
|
||||
"""Extract all root-to-leaf paths from a nested flowchart tree.
|
||||
|
||||
Each path is a list of node dicts (each with id, name, type).
|
||||
Returns a list of paths useful for human review and LLM verification.
|
||||
"""
|
||||
paths: list[list[dict]] = []
|
||||
|
||||
def _walk(node: dict, current_path: list[dict]):
|
||||
entry = {"id": node.get("id", ""), "name": node.get("name", ""), "type": node.get("type", "")}
|
||||
new_path = current_path + [entry]
|
||||
|
||||
if node.get("type") == "end":
|
||||
paths.append(new_path)
|
||||
return
|
||||
|
||||
children = node.get("children", [])
|
||||
if not children:
|
||||
paths.append(new_path)
|
||||
return
|
||||
|
||||
if node.get("type") == "decision":
|
||||
for child in children:
|
||||
_walk(child["node"], new_path)
|
||||
else:
|
||||
for child in children:
|
||||
_walk(child, new_path)
|
||||
|
||||
_walk(tree, [])
|
||||
return paths
|
||||
|
||||
@staticmethod
|
||||
def paths_to_text(paths: list[list[dict]]) -> str:
|
||||
"""Render extracted paths as human-readable text for review."""
|
||||
lines: list[str] = []
|
||||
for i, path in enumerate(paths, 1):
|
||||
steps = []
|
||||
for node in path:
|
||||
if node["type"] == "decision":
|
||||
steps.append(f"[判断] {node['name']}")
|
||||
elif node["type"] == "end":
|
||||
steps.append(f"[结束] {node['name']}")
|
||||
else:
|
||||
steps.append(f"[{node['type']}] {node['name']}")
|
||||
lines.append(f"路径 {i}: {' -> '.join(steps)}")
|
||||
return "\n".join(lines)
|
||||
|
||||
@staticmethod
|
||||
def _extract_json(text: str) -> Optional[dict]:
|
||||
"""Try multiple strategies to extract a JSON object from text.
|
||||
|
||||
Returns the parsed dict or None.
|
||||
"""
|
||||
# Strategy 1: first { ... } pair (simple regex)
|
||||
json_match = re.search(r'\{.*\}', text, re.DOTALL)
|
||||
if json_match:
|
||||
try:
|
||||
return json.loads(json_match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Strategy 2: find balanced braces
|
||||
start = text.find("{")
|
||||
if start >= 0:
|
||||
depth = 0
|
||||
for i in range(start, len(text)):
|
||||
if text[i] == "{":
|
||||
depth += 1
|
||||
elif text[i] == "}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
try:
|
||||
return json.loads(text[start:i + 1])
|
||||
except json.JSONDecodeError:
|
||||
break
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _mime_type(image_path: str) -> str:
|
||||
|
||||
@@ -0,0 +1,384 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Verify flowchart logic trees for structural correctness and consistency.
|
||||
|
||||
Usage::
|
||||
|
||||
python verify_flowchart.py <parsed.json|flowchart.json> [--llm] [--output-report REPORT.md]
|
||||
|
||||
Performs three levels of checks:
|
||||
|
||||
1. **Structural validation** — tree integrity, node uniqueness, leaf types
|
||||
2. **Path extraction** — renders all root-to-leaf paths as readable text
|
||||
3. **LLM consistency check** (opt-in with ``--llm``) — compares extracted paths
|
||||
against the original text description for logical inconsistencies
|
||||
|
||||
Outputs PASS/FAIL and a detailed report.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
from image_parser import ImageParser
|
||||
from LLM import LLMClient
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt for LLM path-vs-description consistency check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
PROMPT_VERIFY_PATHS = """你是一个流程图审核专家。以下内容来自同一张流程图的解析结果:
|
||||
|
||||
## 流程图路径(从嵌套逻辑树提取的所有根到叶路径)
|
||||
```
|
||||
{paths_text}
|
||||
```
|
||||
|
||||
## 原始文字描述
|
||||
```
|
||||
{description}
|
||||
```
|
||||
|
||||
## 你的任务
|
||||
逐条检查每条路径是否与文字描述一致。重点关注:
|
||||
|
||||
1. **分支方向错误**:路径中的判断分支走向是否与文字描述矛盾?
|
||||
例如:文字说"满足条件后退出",但路径中"是"分支走向了"不受限"。
|
||||
2. **缺失步骤**:路径中是否缺少文字描述中提到的关键步骤?
|
||||
3. **冗余步骤**:路径中是否包含文字描述未提及的多余步骤?
|
||||
4. **条件颠倒**:判断条件的"是/否"分支是否与文字描述相反?
|
||||
|
||||
## 输出格式
|
||||
|
||||
如果**所有路径一致**,只输出:
|
||||
```
|
||||
[[PATHS_CONSISTENT]]
|
||||
```
|
||||
|
||||
如果**发现不一致**,输出 JSON 数组:
|
||||
```json
|
||||
[
|
||||
{{
|
||||
"path_index": 1,
|
||||
"issue_type": "branch_error|missing_step|redundant_step|condition_reversed",
|
||||
"severity": "high|medium|low",
|
||||
"description": "用中文说明具体问题"
|
||||
}}
|
||||
]
|
||||
```
|
||||
|
||||
注意:输出必须是严格合法的 JSON 数组,不要有尾随逗号,不要包含代码块包裹符号。
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Core verification logic
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def verify_parsed_json(parsed_path: str, *, use_llm: bool = False) -> dict:
|
||||
"""Load _parsed.json and verify all flowchart logic trees.
|
||||
|
||||
Returns a report dict with keys:
|
||||
- total_flowcharts: int
|
||||
- passed: int
|
||||
- failed: int
|
||||
- results: list of per-flowchart results
|
||||
"""
|
||||
with open(parsed_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
image_analysis = data.get("image_analysis", [])
|
||||
flowcharts = [img for img in image_analysis if img.get("type") == "flowchart"]
|
||||
|
||||
report = {
|
||||
"total_flowcharts": len(flowcharts),
|
||||
"passed": 0,
|
||||
"failed": 0,
|
||||
"results": [],
|
||||
}
|
||||
|
||||
llm = LLMClient() if use_llm else None
|
||||
|
||||
for img in flowcharts:
|
||||
rid = img.get("rid", "unknown")
|
||||
logger.info("Verifying flowchart: rid=%s", rid)
|
||||
|
||||
result = _verify_single(img, llm)
|
||||
report["results"].append(result)
|
||||
|
||||
if result["structural_ok"] and (not use_llm or result.get("llm_ok", True)):
|
||||
report["passed"] += 1
|
||||
else:
|
||||
report["failed"] += 1
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def verify_flowchart_file(filepath: str, *, use_llm: bool = False) -> dict:
|
||||
"""Load a standalone flowchart JSON file and verify it."""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
tree = json.load(f)
|
||||
|
||||
img = {"logic_tree_nested": tree, "description": "", "rid": os.path.basename(filepath)}
|
||||
llm = LLMClient() if use_llm else None
|
||||
result = _verify_single(img, llm)
|
||||
|
||||
return {
|
||||
"total_flowcharts": 1,
|
||||
"passed": 1 if result["structural_ok"] else 0,
|
||||
"failed": 0 if result["structural_ok"] else 1,
|
||||
"results": [result],
|
||||
}
|
||||
|
||||
|
||||
def _verify_single(img: dict, llm: LLMClient | None) -> dict:
|
||||
"""Verify a single flowchart image analysis entry."""
|
||||
rid = img.get("rid", "unknown")
|
||||
description = img.get("description", "").strip()
|
||||
|
||||
# Try nested format first, fall back to flat format
|
||||
tree = img.get("logic_tree_nested") or img.get("logic_tree")
|
||||
if tree is None:
|
||||
return {
|
||||
"rid": rid,
|
||||
"structural_ok": False,
|
||||
"errors": ["No logic_tree found"],
|
||||
"paths_text": "",
|
||||
"llm_issues": [],
|
||||
}
|
||||
|
||||
# Check if it's the new nested format or old flat format
|
||||
is_nested = "children" in tree and isinstance(tree.get("children"), list)
|
||||
|
||||
# --- Level 1: Structural validation ---
|
||||
structural_ok = True
|
||||
errors: list[str] = []
|
||||
|
||||
if is_nested:
|
||||
ok, err = ImageParser._validate_flowchart(tree)
|
||||
if not ok:
|
||||
structural_ok = False
|
||||
errors.append(f"Structure: {err}")
|
||||
|
||||
# Extract paths
|
||||
paths = ImageParser.extract_paths(tree)
|
||||
paths_text = ImageParser.paths_to_text(paths)
|
||||
errors.append(f"Path count: {len(paths)}")
|
||||
else:
|
||||
# Old flat format — basic check
|
||||
nodes = tree.get("nodes", [])
|
||||
ids = [n.get("id", "") for n in nodes]
|
||||
if len(ids) != len(set(ids)):
|
||||
structural_ok = False
|
||||
errors.append("Structure: duplicate node ids in flat format")
|
||||
|
||||
# Build simple path-like text for flat format
|
||||
paths_text = _flat_to_text(tree)
|
||||
|
||||
# --- Level 2: Path count sanity check ---
|
||||
if is_nested and len(paths) == 0:
|
||||
structural_ok = False
|
||||
errors.append("No paths extracted from tree")
|
||||
|
||||
# --- Level 3: LLM consistency check ---
|
||||
llm_issues: list[dict] = []
|
||||
llm_ok = True
|
||||
if llm and description and paths_text:
|
||||
prompt = PROMPT_VERIFY_PATHS.format(
|
||||
paths_text=paths_text,
|
||||
description=description,
|
||||
)
|
||||
try:
|
||||
raw = llm.chat(
|
||||
model=LLMClient.TEXT_MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
llm_issues = _parse_llm_issues(raw)
|
||||
if llm_issues:
|
||||
llm_ok = False
|
||||
errors.append(f"LLM found {len(llm_issues)} issue(s)")
|
||||
except RuntimeError as e:
|
||||
errors.append(f"LLM check failed: {e}")
|
||||
|
||||
return {
|
||||
"rid": rid,
|
||||
"structural_ok": structural_ok,
|
||||
"errors": errors,
|
||||
"paths_text": paths_text,
|
||||
"llm_ok": llm_ok,
|
||||
"llm_issues": llm_issues,
|
||||
}
|
||||
|
||||
|
||||
def _flat_to_text(tree: dict) -> str:
|
||||
"""Build path-like text from old flat-format logic_tree."""
|
||||
nodes = tree.get("nodes", [])
|
||||
root = tree.get("root", "")
|
||||
lines = [f"Root: {root}"]
|
||||
|
||||
node_map = {n["id"]: n for n in nodes}
|
||||
|
||||
def _trace(node_id: str, visited: set, path: list[str]) -> list[str]:
|
||||
if node_id in visited:
|
||||
path.append(f"[循环] {node_id}")
|
||||
return path
|
||||
visited.add(node_id)
|
||||
node = node_map.get(node_id)
|
||||
if node is None:
|
||||
path.append(f"[缺失] {node_id}")
|
||||
return path
|
||||
ntype = node.get("type", "")
|
||||
if ntype == "decision":
|
||||
cond = node.get("condition", "")
|
||||
for b in node.get("branches", []):
|
||||
val = b.get("value", "")
|
||||
tgt = b.get("target", "")
|
||||
new_path = path + [f"[判断] {cond} → {val}"]
|
||||
_trace(tgt, visited.copy(), new_path)
|
||||
elif ntype == "end":
|
||||
path.append(f"[结束] {node.get('description', '')}")
|
||||
lines.append(" -> ".join(path))
|
||||
else:
|
||||
path.append(f"[{ntype}] {node.get('description', '')}")
|
||||
# Flat format doesn't have explicit children for non-decision nodes
|
||||
# so we can't trace further
|
||||
lines.append(" -> ".join(path))
|
||||
return path
|
||||
|
||||
# Try to find start nodes
|
||||
starts = [n for n in nodes if n.get("type") == "start"]
|
||||
if starts:
|
||||
for s in starts:
|
||||
_trace(s["id"], set(), [])
|
||||
else:
|
||||
lines.append("(Cannot trace: no start node in flat format)")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _parse_llm_issues(content: str) -> list[dict]:
|
||||
"""Parse LLM response for path consistency issues."""
|
||||
stripped = content.strip()
|
||||
if "[[PATHS_CONSISTENT]]" in stripped:
|
||||
return []
|
||||
|
||||
# Remove markdown code fences
|
||||
if "```json" in stripped:
|
||||
stripped = stripped.split("```json", 1)[1]
|
||||
if "```" in stripped:
|
||||
stripped = stripped.split("```", 1)[0]
|
||||
elif "```" in stripped:
|
||||
stripped = stripped.split("```", 1)[1]
|
||||
if "```" in stripped:
|
||||
stripped = stripped.split("```", 1)[0]
|
||||
|
||||
stripped = stripped.strip()
|
||||
if not stripped:
|
||||
return []
|
||||
|
||||
try:
|
||||
issues = json.loads(stripped)
|
||||
if isinstance(issues, list):
|
||||
return issues
|
||||
return []
|
||||
except json.JSONDecodeError:
|
||||
logger.debug("Failed to parse LLM issues: %s", stripped[:200])
|
||||
return []
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Report rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def print_report(report: dict) -> str:
|
||||
"""Print a human-readable verification report and return it as a string."""
|
||||
lines: list[str] = []
|
||||
lines.append("=" * 60)
|
||||
lines.append("流程图校验报告")
|
||||
lines.append("=" * 60)
|
||||
lines.append(f"流程图总数: {report['total_flowcharts']}")
|
||||
lines.append(f"通过: {report['passed']}")
|
||||
lines.append(f"失败: {report['failed']}")
|
||||
|
||||
overall = "PASS" if report["failed"] == 0 else "FAIL"
|
||||
lines.append(f"总体结果: {overall}")
|
||||
lines.append("")
|
||||
|
||||
for i, r in enumerate(report["results"], 1):
|
||||
rid = r["rid"]
|
||||
status = "[PASS]" if r["structural_ok"] else "[FAIL]"
|
||||
lines.append(f"[{i}] rid={rid} {status}")
|
||||
for err in r.get("errors", []):
|
||||
lines.append(f" - {err}")
|
||||
|
||||
if r.get("paths_text"):
|
||||
lines.append(" 路径:")
|
||||
for path_line in r["paths_text"].split("\n"):
|
||||
lines.append(f" {path_line}")
|
||||
|
||||
llm_issues = r.get("llm_issues", [])
|
||||
if llm_issues:
|
||||
lines.append(" LLM发现的问题:")
|
||||
for issue in llm_issues:
|
||||
lines.append(f" [{issue.get('severity', '?')}] {issue.get('description', '')}")
|
||||
lines.append("")
|
||||
|
||||
report_text = "\n".join(lines)
|
||||
print(report_text)
|
||||
return report_text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Verify flowchart logic trees for correctness.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"input", metavar="FILE",
|
||||
help="Path to _parsed.json or standalone flowchart JSON",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--llm", action="store_true",
|
||||
help="Run LLM consistency check (compares paths against text description)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-report", metavar="PATH",
|
||||
help="Save verification report to a file",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine input type
|
||||
with open(args.input, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if "image_analysis" in data:
|
||||
report = verify_parsed_json(args.input, use_llm=args.llm)
|
||||
else:
|
||||
report = verify_flowchart_file(args.input, use_llm=args.llm)
|
||||
|
||||
report_text = print_report(report)
|
||||
|
||||
if args.output_report:
|
||||
with open(args.output_report, "w", encoding="utf-8") as f:
|
||||
f.write(report_text)
|
||||
logger.info("Report saved: %s", args.output_report)
|
||||
|
||||
# Exit code: 0 for PASS, 1 for FAIL
|
||||
if report["failed"] > 0:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user