doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -1,38 +1,97 @@
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Resolve secrets file: priority 1) env OPENCLAW_SECRETS,
|
||||
# 2) workspace-document-analyzer/config/ (relative to skills dir),
|
||||
# 3) .openclaw/config/
|
||||
_SECRETS_FILE = None
|
||||
for _candidate in (
|
||||
os.environ.get("OPENCLAW_SECRETS", ""),
|
||||
Path(__file__).resolve().parents[3] / "config" / "secrets.yaml",
|
||||
Path(__file__).resolve().parents[5] / ".openclaw" / "config" / "secrets.yaml",
|
||||
):
|
||||
if _candidate and Path(_candidate).exists():
|
||||
_SECRETS_FILE = Path(_candidate)
|
||||
break
|
||||
if _SECRETS_FILE is None:
|
||||
_SECRETS_FILE = Path("") # empty fallback
|
||||
|
||||
|
||||
def _load_secrets() -> dict:
|
||||
"""Load API keys from secrets.yaml, with env-var overrides."""
|
||||
secrets = {}
|
||||
if _SECRETS_FILE.exists():
|
||||
try:
|
||||
import yaml
|
||||
with open(_SECRETS_FILE, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
for provider in ("deepseek", "dashscope"):
|
||||
if provider in data and isinstance(data[provider], dict):
|
||||
secrets[provider] = data[provider]
|
||||
except ImportError:
|
||||
logger.warning("pyyaml not installed, cannot read %s", _SECRETS_FILE)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to load %s: %s", _SECRETS_FILE, e)
|
||||
|
||||
# Env overrides
|
||||
dk_env = os.environ.get("DEEPSEEK_API_KEY", "")
|
||||
ds_env = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||
if dk_env:
|
||||
secrets.setdefault("deepseek", {})["apiKey"] = dk_env
|
||||
if ds_env:
|
||||
secrets.setdefault("dashscope", {})["apiKey"] = ds_env
|
||||
return secrets
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
|
||||
"""Multi-provider LLM client with retry and token tracking.
|
||||
|
||||
Routes text models to DeepSeek, vision models to DashScope (Bailian).
|
||||
Reads API keys from openclaw config/secrets.yaml, with env-var overrides.
|
||||
|
||||
Usage::
|
||||
|
||||
llm = LLMClient()
|
||||
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
|
||||
content = llm.chat("deepseek-v4-pro", [{"role": "user", "content": "Hello"}])
|
||||
print(llm.usage)
|
||||
"""
|
||||
|
||||
IMAGE_MODEL = "qwen3-vl-plus"
|
||||
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
|
||||
TEXT_MODEL = "deepseek-v4-flash"
|
||||
|
||||
DASHSCOPE_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
DEEPSEEK_BASE = "https://api.deepseek.com/v1"
|
||||
|
||||
TIMEOUT = 120
|
||||
MAX_RETRIES = 3
|
||||
|
||||
_VISION_KEYWORDS = ("vl", "vision", "qwen-vl", "qwen3-vl")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
timeout: int | None = None,
|
||||
):
|
||||
key = os.environ.get("DASHSCOPE_API_KEY", "")
|
||||
if not key:
|
||||
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
|
||||
self._client = OpenAI(api_key=key, base_url=base_url)
|
||||
secrets = _load_secrets()
|
||||
|
||||
ds_cfg = secrets.get("dashscope", {})
|
||||
dk_cfg = secrets.get("deepseek", {})
|
||||
|
||||
dashscope_key = ds_cfg.get("apiKey", "")
|
||||
dashscope_url = ds_cfg.get("baseUrl", self.DASHSCOPE_BASE)
|
||||
deepseek_key = dk_cfg.get("apiKey", "")
|
||||
deepseek_url = dk_cfg.get("baseUrl", self.DEEPSEEK_BASE)
|
||||
|
||||
self._ds_client = OpenAI(api_key=dashscope_key, base_url=dashscope_url) if dashscope_key else None
|
||||
self._dk_client = OpenAI(api_key=deepseek_key, base_url=deepseek_url) if deepseek_key else None
|
||||
|
||||
self._timeout = timeout or self.TIMEOUT
|
||||
self._prompt_tokens = 0
|
||||
self._completion_tokens = 0
|
||||
@@ -49,7 +108,7 @@ class LLMClient:
|
||||
@staticmethod
|
||||
def estimate_tokens(text: str) -> int:
|
||||
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
|
||||
cjk = sum(1 for c in text if '一' <= c <= '鿿' or ' ' <= c <= '〿')
|
||||
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3000' <= c <= '\u303f')
|
||||
other = len(text) - cjk
|
||||
return max(1, int(cjk / 1.7 + other / 3.0))
|
||||
|
||||
@@ -58,6 +117,20 @@ class LLMClient:
|
||||
"""Fixed estimate for one vision-model image (~500 tokens)."""
|
||||
return 500
|
||||
|
||||
@staticmethod
|
||||
def _is_vision_model(model: str) -> bool:
|
||||
return any(kw in model.lower() for kw in LLMClient._VISION_KEYWORDS)
|
||||
|
||||
def _get_client(self, model: str) -> OpenAI:
|
||||
if self._is_vision_model(model):
|
||||
if self._ds_client is None:
|
||||
raise ValueError("DASHSCOPE_API_KEY not set but required for vision model")
|
||||
return self._ds_client
|
||||
else:
|
||||
if self._dk_client is None:
|
||||
raise ValueError("DEEPSEEK_API_KEY not set but required for text model")
|
||||
return self._dk_client
|
||||
|
||||
def chat(
|
||||
self, model: str, messages: list[dict], *, timeout: int | None = None,
|
||||
response_format: dict | None = None,
|
||||
@@ -65,8 +138,10 @@ class LLMClient:
|
||||
"""Send a chat completion request and return the response content.
|
||||
|
||||
Automatically retries on failure and accumulates token usage.
|
||||
Routes to DeepSeek for text, DashScope for vision.
|
||||
"""
|
||||
label = f"chat({model})"
|
||||
client = self._get_client(model)
|
||||
|
||||
def _call():
|
||||
t0 = time.time()
|
||||
@@ -74,7 +149,7 @@ class LLMClient:
|
||||
if response_format is not None:
|
||||
kwargs["response_format"] = response_format
|
||||
kwargs["temperature"] = 0
|
||||
resp = self._client.chat.completions.create(**kwargs)
|
||||
resp = client.chat.completions.create(**kwargs)
|
||||
content = resp.choices[0].message.content
|
||||
usg = resp.usage
|
||||
if usg:
|
||||
|
||||
@@ -96,6 +96,77 @@ PROMPT_DETECT_CONFLICT = """你是一个文档一致性检查专家。以下内
|
||||
"""
|
||||
|
||||
|
||||
def _is_nested_tree(lt: dict) -> bool:
|
||||
"""Return True if logic_tree uses the nested children format."""
|
||||
return isinstance(lt.get("children"), list)
|
||||
|
||||
|
||||
def _logic_tree_to_text(lt: dict) -> str:
|
||||
"""Convert logic_tree JSON to readable text for conflict detection.
|
||||
|
||||
Supports both the new nested-tree format and the legacy flat-nodes format.
|
||||
"""
|
||||
if _is_nested_tree(lt):
|
||||
return _nested_tree_to_text(lt)
|
||||
return _flat_tree_to_text(lt)
|
||||
|
||||
|
||||
def _nested_tree_to_text(tree: dict) -> str:
|
||||
"""Convert a nested flowchart tree to readable text."""
|
||||
lines: list[str] = []
|
||||
|
||||
def _walk(node: dict, indent: int = 0):
|
||||
prefix = " " * indent
|
||||
nid = node.get("id", "")
|
||||
name = node.get("name", "")
|
||||
ntype = node.get("type", "")
|
||||
|
||||
type_label = {
|
||||
"start": "起始", "end": "结束", "process": "处理",
|
||||
"decision": "判断", "action": "动作",
|
||||
}.get(ntype, ntype)
|
||||
|
||||
lines.append(f"{prefix}[{type_label}] {nid}: {name}")
|
||||
|
||||
if ntype == "decision":
|
||||
for child in node.get("children", []):
|
||||
cond = child.get("condition", "")
|
||||
lines.append(f"{prefix} 分支 \"{cond}\":")
|
||||
_walk(child["node"], indent + 2)
|
||||
elif "children" in node:
|
||||
for child in node.get("children", []):
|
||||
_walk(child, indent + 1)
|
||||
|
||||
_walk(tree)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _flat_tree_to_text(lt: dict) -> str:
|
||||
"""Convert legacy flat-nodes logic_tree to readable text."""
|
||||
lines: list[str] = []
|
||||
root = lt.get("root", "")
|
||||
if root:
|
||||
lines.append(f"根节点: {root}")
|
||||
for node in lt.get("nodes", []):
|
||||
nid = node.get("id", "")
|
||||
ntype = node.get("type", "")
|
||||
if ntype == "decision":
|
||||
cond = node.get("condition", "")
|
||||
branches = node.get("branches", [])
|
||||
lines.append(f"判断节点 {nid}: 条件=\"{cond}\"")
|
||||
for b in branches:
|
||||
lines.append(f" - 分支 \"{b.get('value', '')}\" → {b.get('target', '')}")
|
||||
elif ntype == "action":
|
||||
lines.append(f"动作节点 {nid}: {node.get('description', '')}")
|
||||
elif ntype == "state":
|
||||
lines.append(f"状态节点 {nid}: {node.get('description', '')}")
|
||||
elif ntype == "start":
|
||||
lines.append(f"起始节点 {nid}: {node.get('description', '')}")
|
||||
elif ntype == "end":
|
||||
lines.append(f"结束节点 {nid}: {node.get('description', '')}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _build_text_for_section(sections: list[dict], section_name: str) -> str:
|
||||
"""Build a single text block for the given section name."""
|
||||
texts: list[str] = []
|
||||
@@ -184,8 +255,9 @@ def detect_conflicts(
|
||||
img_type = img.get("type", "other")
|
||||
rid = img.get("rid", "")
|
||||
description = img.get("description", "").strip()
|
||||
logic_tree = img.get("logic_tree_nested") or img.get("logic_tree")
|
||||
|
||||
if img_type not in DIAGRAM_TYPES or not description:
|
||||
if img_type not in DIAGRAM_TYPES or (not description and not logic_tree):
|
||||
logger.info("Skip conflict check: rid=%s type=%s", rid, img_type)
|
||||
continue
|
||||
|
||||
@@ -211,8 +283,17 @@ def detect_conflicts(
|
||||
logger.info(" [DRY RUN] would call LLM to detect conflicts")
|
||||
continue
|
||||
|
||||
# Enrich description with logic_tree if available
|
||||
combined_desc = description
|
||||
if logic_tree:
|
||||
lt_text = _logic_tree_to_text(logic_tree)
|
||||
if combined_desc:
|
||||
combined_desc = f"[结构化逻辑树]\n{lt_text}\n\n[文字描述]\n{combined_desc}"
|
||||
else:
|
||||
combined_desc = f"[结构化逻辑树]\n{lt_text}"
|
||||
|
||||
prompt = PROMPT_DETECT_CONFLICT.format(
|
||||
image_description=description,
|
||||
image_description=combined_desc,
|
||||
text_description=text_content,
|
||||
section_name=section_name,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user