sync: update all skills from latest workspace code
CI / test (push) Successful in 8s

doc_parser_skill:
- New: verify_flowchart.py (flowchart validation)
- Updated: LLM.py (multi-provider: DeepSeek + DashScope)
- Updated: image_parser.py (logic tree support, external prompts)
- Updated: SKILL.md, prompts/image_prompt.md

conflict_detection_skill:
- Updated: LLM.py (multi-provider sync)
- Updated: detect_conflicts.py (logic tree text conversion)

ir_generation_skill:
- Replaced old scripts/LLM.py + ir_generator.py with standalone project
- New: main.py, config.py, step1-3_*.py, ensemble_merge.py
- New: prompts/, tests/ subdirectories

tests:
- New: acceptance/ test suite with schema validation
- Fixed: conftest no longer globally skips non-acceptance tests
- Updated: test_sample.py for new ir_generation structure

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
+85 -10
View File
@@ -1,38 +1,97 @@
import logging
import os
import time
from pathlib import Path
from typing import Optional
from openai import OpenAI
logger = logging.getLogger(__name__)
# Resolve secrets file: priority 1) env OPENCLAW_SECRETS,
# 2) workspace-document-analyzer/config/ (relative to skills dir),
# 3) .openclaw/config/
_SECRETS_FILE = None
for _candidate in (
os.environ.get("OPENCLAW_SECRETS", ""),
Path(__file__).resolve().parents[3] / "config" / "secrets.yaml",
Path(__file__).resolve().parents[5] / ".openclaw" / "config" / "secrets.yaml",
):
if _candidate and Path(_candidate).exists():
_SECRETS_FILE = Path(_candidate)
break
if _SECRETS_FILE is None:
_SECRETS_FILE = Path("") # empty fallback
def _load_secrets() -> dict:
"""Load API keys from secrets.yaml, with env-var overrides."""
secrets = {}
if _SECRETS_FILE.exists():
try:
import yaml
with open(_SECRETS_FILE, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
for provider in ("deepseek", "dashscope"):
if provider in data and isinstance(data[provider], dict):
secrets[provider] = data[provider]
except ImportError:
logger.warning("pyyaml not installed, cannot read %s", _SECRETS_FILE)
except Exception as e:
logger.warning("Failed to load %s: %s", _SECRETS_FILE, e)
# Env overrides
dk_env = os.environ.get("DEEPSEEK_API_KEY", "")
ds_env = os.environ.get("DASHSCOPE_API_KEY", "")
if dk_env:
secrets.setdefault("deepseek", {})["apiKey"] = dk_env
if ds_env:
secrets.setdefault("dashscope", {})["apiKey"] = ds_env
return secrets
class LLMClient:
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
"""Multi-provider LLM client with retry and token tracking.
Routes text models to DeepSeek, vision models to DashScope (Bailian).
Reads API keys from openclaw config/secrets.yaml, with env-var overrides.
Usage::
llm = LLMClient()
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
content = llm.chat("deepseek-v4-pro", [{"role": "user", "content": "Hello"}])
print(llm.usage)
"""
IMAGE_MODEL = "qwen3-vl-plus"
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
TEXT_MODEL = "deepseek-v4-flash"
DASHSCOPE_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
DEEPSEEK_BASE = "https://api.deepseek.com/v1"
TIMEOUT = 120
MAX_RETRIES = 3
_VISION_KEYWORDS = ("vl", "vision", "qwen-vl", "qwen3-vl")
def __init__(
self,
*,
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
timeout: int | None = None,
):
key = os.environ.get("DASHSCOPE_API_KEY", "")
if not key:
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
self._client = OpenAI(api_key=key, base_url=base_url)
secrets = _load_secrets()
ds_cfg = secrets.get("dashscope", {})
dk_cfg = secrets.get("deepseek", {})
dashscope_key = ds_cfg.get("apiKey", "")
dashscope_url = ds_cfg.get("baseUrl", self.DASHSCOPE_BASE)
deepseek_key = dk_cfg.get("apiKey", "")
deepseek_url = dk_cfg.get("baseUrl", self.DEEPSEEK_BASE)
self._ds_client = OpenAI(api_key=dashscope_key, base_url=dashscope_url) if dashscope_key else None
self._dk_client = OpenAI(api_key=deepseek_key, base_url=deepseek_url) if deepseek_key else None
self._timeout = timeout or self.TIMEOUT
self._prompt_tokens = 0
self._completion_tokens = 0
@@ -49,7 +108,7 @@ class LLMClient:
@staticmethod
def estimate_tokens(text: str) -> int:
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
cjk = sum(1 for c in text if '' <= c <= '鿿' or ' ' <= c <= '')
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3000' <= c <= '\u303f')
other = len(text) - cjk
return max(1, int(cjk / 1.7 + other / 3.0))
@@ -58,6 +117,20 @@ class LLMClient:
"""Fixed estimate for one vision-model image (~500 tokens)."""
return 500
@staticmethod
def _is_vision_model(model: str) -> bool:
return any(kw in model.lower() for kw in LLMClient._VISION_KEYWORDS)
def _get_client(self, model: str) -> OpenAI:
if self._is_vision_model(model):
if self._ds_client is None:
raise ValueError("DASHSCOPE_API_KEY not set but required for vision model")
return self._ds_client
else:
if self._dk_client is None:
raise ValueError("DEEPSEEK_API_KEY not set but required for text model")
return self._dk_client
def chat(
self, model: str, messages: list[dict], *, timeout: int | None = None,
response_format: dict | None = None,
@@ -65,8 +138,10 @@ class LLMClient:
"""Send a chat completion request and return the response content.
Automatically retries on failure and accumulates token usage.
Routes to DeepSeek for text, DashScope for vision.
"""
label = f"chat({model})"
client = self._get_client(model)
def _call():
t0 = time.time()
@@ -74,7 +149,7 @@ class LLMClient:
if response_format is not None:
kwargs["response_format"] = response_format
kwargs["temperature"] = 0
resp = self._client.chat.completions.create(**kwargs)
resp = client.chat.completions.create(**kwargs)
content = resp.choices[0].message.content
usg = resp.usage
if usg:
@@ -96,6 +96,77 @@ PROMPT_DETECT_CONFLICT = """你是一个文档一致性检查专家。以下内
"""
def _is_nested_tree(lt: dict) -> bool:
"""Return True if logic_tree uses the nested children format."""
return isinstance(lt.get("children"), list)
def _logic_tree_to_text(lt: dict) -> str:
"""Convert logic_tree JSON to readable text for conflict detection.
Supports both the new nested-tree format and the legacy flat-nodes format.
"""
if _is_nested_tree(lt):
return _nested_tree_to_text(lt)
return _flat_tree_to_text(lt)
def _nested_tree_to_text(tree: dict) -> str:
"""Convert a nested flowchart tree to readable text."""
lines: list[str] = []
def _walk(node: dict, indent: int = 0):
prefix = " " * indent
nid = node.get("id", "")
name = node.get("name", "")
ntype = node.get("type", "")
type_label = {
"start": "起始", "end": "结束", "process": "处理",
"decision": "判断", "action": "动作",
}.get(ntype, ntype)
lines.append(f"{prefix}[{type_label}] {nid}: {name}")
if ntype == "decision":
for child in node.get("children", []):
cond = child.get("condition", "")
lines.append(f"{prefix} 分支 \"{cond}\":")
_walk(child["node"], indent + 2)
elif "children" in node:
for child in node.get("children", []):
_walk(child, indent + 1)
_walk(tree)
return "\n".join(lines)
def _flat_tree_to_text(lt: dict) -> str:
"""Convert legacy flat-nodes logic_tree to readable text."""
lines: list[str] = []
root = lt.get("root", "")
if root:
lines.append(f"根节点: {root}")
for node in lt.get("nodes", []):
nid = node.get("id", "")
ntype = node.get("type", "")
if ntype == "decision":
cond = node.get("condition", "")
branches = node.get("branches", [])
lines.append(f"判断节点 {nid}: 条件=\"{cond}\"")
for b in branches:
lines.append(f" - 分支 \"{b.get('value', '')}\"{b.get('target', '')}")
elif ntype == "action":
lines.append(f"动作节点 {nid}: {node.get('description', '')}")
elif ntype == "state":
lines.append(f"状态节点 {nid}: {node.get('description', '')}")
elif ntype == "start":
lines.append(f"起始节点 {nid}: {node.get('description', '')}")
elif ntype == "end":
lines.append(f"结束节点 {nid}: {node.get('description', '')}")
return "\n".join(lines)
def _build_text_for_section(sections: list[dict], section_name: str) -> str:
"""Build a single text block for the given section name."""
texts: list[str] = []
@@ -184,8 +255,9 @@ def detect_conflicts(
img_type = img.get("type", "other")
rid = img.get("rid", "")
description = img.get("description", "").strip()
logic_tree = img.get("logic_tree_nested") or img.get("logic_tree")
if img_type not in DIAGRAM_TYPES or not description:
if img_type not in DIAGRAM_TYPES or (not description and not logic_tree):
logger.info("Skip conflict check: rid=%s type=%s", rid, img_type)
continue
@@ -211,8 +283,17 @@ def detect_conflicts(
logger.info(" [DRY RUN] would call LLM to detect conflicts")
continue
# Enrich description with logic_tree if available
combined_desc = description
if logic_tree:
lt_text = _logic_tree_to_text(logic_tree)
if combined_desc:
combined_desc = f"[结构化逻辑树]\n{lt_text}\n\n[文字描述]\n{combined_desc}"
else:
combined_desc = f"[结构化逻辑树]\n{lt_text}"
prompt = PROMPT_DETECT_CONFLICT.format(
image_description=description,
image_description=combined_desc,
text_description=text_content,
section_name=section_name,
)
+4 -1
View File
@@ -29,7 +29,10 @@ description: 解析文档(.docx, .pdf)以提取图像和文本结构,并
该技能生成一个结构化JSON文件,文件名为输入文档的基本名称后跟'_parsed.json',包含:
- `sections`:按标题分组的文档文本结构
- `image_sources`:从图像标识符到其在文档中位置的映射
- `image_analysis`:由视觉大语言模型确定的每个图像的类型内容描述
- `image_analysis`:由视觉大语言模型确定的每个图像的类型内容描述和(如适用)结构化逻辑树
- `type`: 图片类型(flowchart/architecture/state/sequence/activity/other
- `description`: 图片的文字描述
- `logic_tree`(可选,仅图表类型):结构化逻辑树JSON,包含 `root`(根节点描述)和 `nodes` 数组。节点类型:`decision`(判断)、`action`(动作)、`state`(状态)、`start`(开始)、`end`(结束)。decision 节点包含 `condition``branches` 字段,其他节点包含 `description` 字段。
## 集成点
@@ -0,0 +1,203 @@
请分析这张图片,判断类型并输出文字描述和(如适用)结构化逻辑树。
## 判断图片类型
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,你需要输出三项内容:
1. 类型标签
2. **嵌套逻辑树 JSON**(见下方格式)
3. 文字描述
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),只输出类型标签和简要文字描述。
## 嵌套逻辑树 JSON 格式(仅流程图/架构图/状态图/时序图/活动图需要)
**核心原则:用嵌套的 `children` 数组表达流程的层级关系,而不是用 id 引用。** 这种格式更贴近流程图的自然结构,每个节点的后续步骤直接嵌套在其下方。
### 节点类型
| 类型 | 含义 | 对应形状 |
|------|------|----------|
| `start` | 起始节点 | 椭圆/圆角矩形 |
| `end` | 结束节点 | 椭圆/圆角矩形 |
| `process` | 处理/状态节点 | 矩形/圆角矩形 |
| `decision` | 判断节点 | 菱形 |
| `action` | 动作节点 | 矩形 |
### 非判断节点的 children 格式
对于 `start``end``process``action` 节点,`children` 是一个数组,包含后续步骤节点:
```json
{
"id": "n1",
"name": "节点名称",
"type": "process",
"children": [
{
"id": "n2",
"name": "下一个步骤",
"type": "action",
"children": [...]
}
]
}
```
### 判断节点的 children 格式
对于 `decision` 节点,`children` 是一个数组,每个元素包含 `condition`(分支条件)和 `node`(该分支对应的子节点):
```json
{
"id": "n5",
"name": "是否满足条件?",
"type": "decision",
"children": [
{
"condition": "是",
"node": {
"id": "n6",
"name": "满足条件时的动作",
"type": "action",
"children": [...]
}
},
{
"condition": "否",
"node": {
"id": "n7",
"name": "不满足条件时的动作",
"type": "action",
"children": [...]
}
}
]
}
```
### 结束节点
`end` 节点没有 `children` 字段:
```json
{
"id": "n10",
"name": "流程结束",
"type": "end"
}
```
### 完整示例
```json
{
"id": "n1",
"name": "开关状态",
"type": "start",
"children": [
{
"id": "n2",
"name": "开启",
"type": "process",
"children": [
{
"id": "n3",
"name": "是否在目标场景?",
"type": "decision",
"children": [
{
"condition": "否",
"node": {
"id": "n4",
"name": "不受限",
"type": "end"
}
},
{
"condition": "是",
"node": {
"id": "n5",
"name": "车速是否≥15km/h且持续5秒?",
"type": "decision",
"children": [
{
"condition": "否",
"node": {
"id": "n6",
"name": "不受限",
"type": "end"
}
},
{
"condition": "是",
"node": {
"id": "n7",
"name": "暂停功能",
"type": "action",
"children": [
{
"id": "n8",
"name": "发起Toast提示",
"type": "end"
}
]
}
}
]
}
}
]
}
]
},
{
"id": "n9",
"name": "关闭",
"type": "process",
"children": [
{
"id": "n10",
"name": "不受限",
"type": "end"
}
]
}
]
}
```
### 规则
1. 每条从根节点到 `end` 节点的路径必须是完整的逻辑链
2. `decision` 节点的 `children` 必须穷举所有分支(通常为"是/否"),每条分支包含 `condition``node`
3. 只有 `end` 节点没有 `children` 字段,其他所有节点都应该有 `children`
4. 节点 id 使用 "n1", "n2", "n3"... 格式,按流程图从上到下、从左到右的顺序编号
5. 仔细阅读图片中的每个判断条件和分支走向,确保分支目标节点正确
6. 如果流程图中某个分支的后续步骤在图片中没有展示,将其标记为 `end` 节点,`name` 设为"(图中未展示)"
7. **如果图片包含多个独立的流程图**(例如上半部分和下半部分分别描述不同场景),使用一个统一的 `process` 根节点将它们组织在一起。例如图片中有"策略A"和"策略B"两个流程,结构为:
```json
{
"id": "n1",
"name": "策略总览",
"type": "process",
"children": [
{"id": "n2", "name": "策略A流程", "type": "process", "children": [...]},
{"id": "n3", "name": "策略B流程", "type": "process", "children": [...]}
]
}
```
## 输出格式
**1. 类型标签(单独一行):**
type: <flowchart|architecture|state|sequence|activity|other>
**2. 逻辑树 JSON(仅上述5种类型,以 logic_tree: 开头,后跟 JSON 对象):**
logic_tree:
{...}
**3. 文字描述(以 description: 开头):**
description:
该图片的详细文字描述。对于流程图/架构图等类型,这里提供自然语言总结;对于其他类型,这是唯一的描述内容。
不要输出 ``` 代码块包裹符号,不要输出 ---YAML--- 分隔符,不要添加任何额外的解释或问候语。
+85 -10
View File
@@ -1,38 +1,97 @@
import logging
import os
import time
from pathlib import Path
from typing import Optional
from openai import OpenAI
logger = logging.getLogger(__name__)
# Resolve secrets file: priority 1) env OPENCLAW_SECRETS,
# 2) workspace-document-analyzer/config/ (relative to skills dir),
# 3) .openclaw/config/
_SECRETS_FILE = None
for _candidate in (
os.environ.get("OPENCLAW_SECRETS", ""),
Path(__file__).resolve().parents[3] / "config" / "secrets.yaml",
Path(__file__).resolve().parents[5] / ".openclaw" / "config" / "secrets.yaml",
):
if _candidate and Path(_candidate).exists():
_SECRETS_FILE = Path(_candidate)
break
if _SECRETS_FILE is None:
_SECRETS_FILE = Path("") # empty fallback
def _load_secrets() -> dict:
"""Load API keys from secrets.yaml, with env-var overrides."""
secrets = {}
if _SECRETS_FILE.exists():
try:
import yaml
with open(_SECRETS_FILE, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
for provider in ("deepseek", "dashscope"):
if provider in data and isinstance(data[provider], dict):
secrets[provider] = data[provider]
except ImportError:
logger.warning("pyyaml not installed, cannot read %s", _SECRETS_FILE)
except Exception as e:
logger.warning("Failed to load %s: %s", _SECRETS_FILE, e)
# Env overrides
dk_env = os.environ.get("DEEPSEEK_API_KEY", "")
ds_env = os.environ.get("DASHSCOPE_API_KEY", "")
if dk_env:
secrets.setdefault("deepseek", {})["apiKey"] = dk_env
if ds_env:
secrets.setdefault("dashscope", {})["apiKey"] = ds_env
return secrets
class LLMClient:
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
"""Multi-provider LLM client with retry and token tracking.
Routes text models to DeepSeek, vision models to DashScope (Bailian).
Reads API keys from openclaw config/secrets.yaml, with env-var overrides.
Usage::
llm = LLMClient()
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
content = llm.chat("deepseek-v4-pro", [{"role": "user", "content": "Hello"}])
print(llm.usage)
"""
IMAGE_MODEL = "qwen3-vl-plus"
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
TEXT_MODEL = "deepseek-v4-flash"
DASHSCOPE_BASE = "https://dashscope.aliyuncs.com/compatible-mode/v1"
DEEPSEEK_BASE = "https://api.deepseek.com/v1"
TIMEOUT = 120
MAX_RETRIES = 3
_VISION_KEYWORDS = ("vl", "vision", "qwen-vl", "qwen3-vl")
def __init__(
self,
*,
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
timeout: int | None = None,
):
key = os.environ.get("DASHSCOPE_API_KEY", "")
if not key:
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
self._client = OpenAI(api_key=key, base_url=base_url)
secrets = _load_secrets()
ds_cfg = secrets.get("dashscope", {})
dk_cfg = secrets.get("deepseek", {})
dashscope_key = ds_cfg.get("apiKey", "")
dashscope_url = ds_cfg.get("baseUrl", self.DASHSCOPE_BASE)
deepseek_key = dk_cfg.get("apiKey", "")
deepseek_url = dk_cfg.get("baseUrl", self.DEEPSEEK_BASE)
self._ds_client = OpenAI(api_key=dashscope_key, base_url=dashscope_url) if dashscope_key else None
self._dk_client = OpenAI(api_key=deepseek_key, base_url=deepseek_url) if deepseek_key else None
self._timeout = timeout or self.TIMEOUT
self._prompt_tokens = 0
self._completion_tokens = 0
@@ -49,7 +108,7 @@ class LLMClient:
@staticmethod
def estimate_tokens(text: str) -> int:
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
cjk = sum(1 for c in text if '' <= c <= '鿿' or ' ' <= c <= '')
cjk = sum(1 for c in text if '\u4e00' <= c <= '\u9fff' or '\u3000' <= c <= '\u303f')
other = len(text) - cjk
return max(1, int(cjk / 1.7 + other / 3.0))
@@ -58,6 +117,20 @@ class LLMClient:
"""Fixed estimate for one vision-model image (~500 tokens)."""
return 500
@staticmethod
def _is_vision_model(model: str) -> bool:
return any(kw in model.lower() for kw in LLMClient._VISION_KEYWORDS)
def _get_client(self, model: str) -> OpenAI:
if self._is_vision_model(model):
if self._ds_client is None:
raise ValueError("DASHSCOPE_API_KEY not set but required for vision model")
return self._ds_client
else:
if self._dk_client is None:
raise ValueError("DEEPSEEK_API_KEY not set but required for text model")
return self._dk_client
def chat(
self, model: str, messages: list[dict], *, timeout: int | None = None,
response_format: dict | None = None,
@@ -65,8 +138,10 @@ class LLMClient:
"""Send a chat completion request and return the response content.
Automatically retries on failure and accumulates token usage.
Routes to DeepSeek for text, DashScope for vision.
"""
label = f"chat({model})"
client = self._get_client(model)
def _call():
t0 = time.time()
@@ -74,7 +149,7 @@ class LLMClient:
if response_format is not None:
kwargs["response_format"] = response_format
kwargs["temperature"] = 0
resp = self._client.chat.completions.create(**kwargs)
resp = client.chat.completions.create(**kwargs)
content = resp.choices[0].message.content
usg = resp.usage
if usg:
+322 -35
View File
@@ -1,6 +1,8 @@
import base64
import json
import logging
import os
import re
from typing import Optional
from LLM import LLMClient
@@ -8,32 +10,56 @@ from LLM import LLMClient
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Prompts
# Prompt loading
# ---------------------------------------------------------------------------
PROMPT_IMAGE = """请分析这张图片,判断类型并输出文字描述。
def _load_prompt() -> str:
"""Load PROMPT_IMAGE from external file, falling back to inline default."""
prompt_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "prompts")
prompt_path = os.path.join(prompt_dir, "image_prompt.md")
if os.path.isfile(prompt_path):
with open(prompt_path, "r", encoding="utf-8") as f:
return f.read()
# Fallback inline prompt (nested tree format)
return """请分析这张图片,判断类型并输出文字描述和(如适用)结构化逻辑树。
## 判断图片类型
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,详细描述
- 图中所有节点/步骤/状态/组件的名称
- 所有连线/箭头/转换关系及其方向
- 所有分支条件、判断逻辑和判断结果
- 所有文字标注、注释、标签
- 图的整体结构和逻辑流程
- 如果图片包含多个子图,拆解描述
如果是 **流程图 / 架构图 / 状态图 / 时序图 / 活动图**,你需要输出三项内容
1. 类型标签
2. **嵌套逻辑树 JSON**(见下方格式)
3. 文字描述
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),简要描述图片内容
如果是 **其他类型**(UI原型图 / 界面截图 / 设计稿 / 手机屏幕截图 / 网页截图等),只输出类型标签和简要文字描述
## 嵌套逻辑树 JSON 格式(仅流程图/架构图/状态图/时序图/活动图需要)
**核心原则:用嵌套的 `children` 数组表达流程的层级关系,而不是用 id 引用。**
节点类型:`start`(起始), `end`(结束), `process`(处理/状态), `decision`(判断), `action`(动作)
非判断节点的 `children` 是子节点数组。`end` 节点无 `children`。
判断节点的 `children` 格式:
```json
{"condition": "", "node": {"id": "n6", "name": "...", "type": "action", "children": [...]}}
```
每条从根到 `end` 的路径必须是完整逻辑链。decision 必须穷举所有分支。
节点 id 使用 "n1", "n2", "n3"... 格式。
## 输出格式
**1. 类型标签(单独一行):**
type: <flowchart|architecture|state|sequence|activity|other>
**2. 文字描述:**
该图片的详细文字描述。
logic_tree:
{...}
不要输出 ---YAML--- 分隔符或 YAML 内容,不要添加任何额外的解释或问候语。"""
description:
该图片的详细文字描述。"""
PROMPT_IMAGE = _load_prompt()
# ---------------------------------------------------------------------------
@@ -41,7 +67,10 @@ type: <flowchart|architecture|state|sequence|activity|other>
# ---------------------------------------------------------------------------
class ImageParser:
"""Vision LLM wrapper for parsing images (type + description).
"""Vision LLM wrapper for parsing images (type + description + logic_tree).
The nested-tree ``logic_tree`` is stored alongside a backward-compatible
flat representation so downstream consumers are not broken.
Usage::
@@ -49,7 +78,7 @@ class ImageParser:
result = parser.parse_image("images/img1.png")
"""
_VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "text"}
_VALID_TYPES = {"flowchart", "architecture", "state", "sequence", "activity", "other"}
def __init__(self, llm: LLMClient | None = None):
self._llm = llm or LLMClient()
@@ -59,9 +88,9 @@ class ImageParser:
return self._llm.usage
def parse_image(self, image_path: str) -> Optional[dict]:
"""Parse an image and return its type and description (no YAML IR).
"""Parse an image and return its type, description, and optional logic_tree.
Returns ``{type, description}``, or *None* for UI mockups.
Returns ``{type, description, [logic_tree], [logic_tree_nested]}``.
"""
logger.info("Parsing image: %s", image_path)
@@ -84,34 +113,292 @@ class ImageParser:
logger.error(str(e))
return {"type": "other", "description": "", "error": str(e)}
parsed = self._parse_type_and_description(content)
parsed = self._parse_response(content)
if parsed is None:
return None
return {"type": parsed[0], "description": parsed[1]}
ptype, description, logic_tree_nested = parsed
result: dict = {"type": ptype, "description": description}
if logic_tree_nested is not None:
result["logic_tree_nested"] = logic_tree_nested
result["logic_tree"] = self._flatten_tree(logic_tree_nested)
return result
# ---- internals ----------------------------------------------------------
def _parse_type_and_description(self, content: str) -> Optional[tuple[str, str]]:
"""Extract ``(type, description)`` from LLM response.
def _parse_response(self, content: str) -> Optional[tuple[str, str, Optional[dict]]]:
"""Extract ``(type, description, logic_tree_nested)`` from LLM response.
Returns *None* for ``[[UI]]`` (skip).
Parses the nested-tree format. Returns *None* for unparseable content.
"""
content = content.strip()
if content == "[[UI]]" or content.startswith("[[UI]]"):
return None
parsed_type = "other"
desc_lines: list[str] = []
for line in content.splitlines():
stripped = line.strip()
if (stripped.startswith("type:") or stripped.startswith("类型:")) and parsed_type == "other":
type_val = stripped.split(":", 1)[1].strip().lower()
if type_val in self._VALID_TYPES:
parsed_type = type_val
else:
desc_lines.append(line)
logic_tree = None
description = ""
return parsed_type, "\n".join(desc_lines).strip()
# --- type ---
type_match = re.search(r'(?:type|类型):\s*(\S+)', content)
if type_match:
type_val = type_match.group(1).strip().lower()
if type_val in self._VALID_TYPES:
parsed_type = type_val
# --- logic_tree (anchored at line start) ---
lt_match = re.search(r'(?m)^logic_tree:\s*', content)
desc_match = re.search(r'(?m)^description:\s*', content)
if lt_match:
lt_start = lt_match.end()
lt_end = desc_match.start() if desc_match and desc_match.start() > lt_start else len(content)
lt_raw = content[lt_start:lt_end].strip()
# Try multiple JSON extraction strategies
logic_tree = self._extract_json(lt_raw)
if logic_tree is not None:
is_valid, err_msg = self._validate_flowchart(logic_tree)
if not is_valid:
logger.warning("Flowchart validation warning: %s", err_msg)
else:
logger.info("Failed to extract logic_tree JSON. Raw block length=%d", len(lt_raw))
logger.debug("Raw logic_tree block: %s", lt_raw[:500])
elif parsed_type in self._VALID_TYPES - {"other"}:
logger.info("Diagram type=%s but no logic_tree: in response. Response length=%d",
parsed_type, len(content))
logger.debug("Raw response (first 500): %s", content[:500])
# --- description ---
if desc_match:
description = content[desc_match.end():].strip()
else:
desc = content
if type_match:
desc = desc[type_match.end():]
desc = re.sub(r'(?m)^logic_tree:\s*\{.*?\}\s*', '', desc, flags=re.DOTALL)
description = desc.strip()
return parsed_type, description, logic_tree
@staticmethod
def _validate_flowchart(tree: dict) -> tuple[bool, str]:
"""Validate a nested flowchart tree structure.
Returns ``(is_valid, error_message)``. Non-fatal: returns ``False``
with a warning message but the tree is still kept.
"""
if not isinstance(tree, dict):
return False, "logic_tree is not a dict"
seen_ids: set[str] = set()
def _walk(node: dict, depth: int = 0) -> tuple[bool, str]:
if depth > 20:
return False, f"Tree too deep (>20) at node {node.get('id', '?')}"
nid = node.get("id", "")
if not nid:
return False, "Node missing 'id' field"
if not isinstance(nid, str):
return False, f"Node id must be string, got {type(nid).__name__}"
if nid in seen_ids:
return False, f"Duplicate node id: {nid}"
seen_ids.add(nid)
ntype = node.get("type", "")
if ntype not in ("start", "end", "process", "decision", "action"):
return False, f"Unknown node type '{ntype}' at {nid}"
if ntype == "end":
if "children" in node:
return False, f"End node {nid} should not have children"
return True, ""
children = node.get("children")
if not children:
if ntype != "end":
return False, f"Non-end node {nid} ({ntype}) has no children"
return True, ""
if not isinstance(children, list):
return False, f"children of {nid} is not a list"
if ntype == "decision":
for child in children:
if not isinstance(child, dict):
return False, f"decision child of {nid} is not a dict"
if "condition" not in child:
return False, f"decision child of {nid} missing 'condition'"
if "node" not in child:
return False, f"decision child of {nid} missing 'node'"
ok, err = _walk(child["node"], depth + 1)
if not ok:
return False, err
else:
for child in children:
if not isinstance(child, dict):
return False, f"child of {nid} is not a dict"
ok, err = _walk(child, depth + 1)
if not ok:
return False, err
return True, ""
return _walk(tree)
@staticmethod
def _flatten_tree(tree: dict) -> dict:
"""Convert a nested flowchart tree into the legacy flat-nodes format.
This preserves backward compatibility with downstream consumers
(conflict_detection_skill, ir_generator) that expect the flat format.
"""
nodes: list[dict] = []
root_name = ""
def _collect(node: dict):
nonlocal root_name
nid = node.get("id", "")
ntype = node.get("type", "")
name = node.get("name", "")
if root_name == "" and "children" in node:
root_name = name
if ntype == "decision":
branches = []
for child in node.get("children", []):
branches.append({
"value": child.get("condition", ""),
"target": child["node"].get("id", ""),
})
_collect(child["node"])
nodes.append({
"id": nid,
"type": ntype,
"condition": name,
"branches": branches,
})
elif ntype in ("action", "process", "state"):
nodes.append({
"id": nid,
"type": ntype,
"description": name,
})
for child in node.get("children", []):
_collect(child)
elif ntype == "start":
nodes.append({
"id": nid,
"type": ntype,
"description": name,
})
for child in node.get("children", []):
_collect(child)
# end nodes are collected but have no children
_collect(tree)
# Add end nodes from the nested tree
ends: list[dict] = []
def _collect_ends(node: dict):
if node.get("type") == "end":
ends.append({
"id": node.get("id", ""),
"type": "end",
"description": node.get("name", ""),
})
elif "children" in node:
for child in node.get("children", []):
if isinstance(child, dict):
if "node" in child:
_collect_ends(child["node"])
else:
_collect_ends(child)
_collect_ends(tree)
nodes.extend(ends)
return {"root": root_name, "nodes": nodes}
@staticmethod
def extract_paths(tree: dict) -> list[list[dict]]:
"""Extract all root-to-leaf paths from a nested flowchart tree.
Each path is a list of node dicts (each with id, name, type).
Returns a list of paths useful for human review and LLM verification.
"""
paths: list[list[dict]] = []
def _walk(node: dict, current_path: list[dict]):
entry = {"id": node.get("id", ""), "name": node.get("name", ""), "type": node.get("type", "")}
new_path = current_path + [entry]
if node.get("type") == "end":
paths.append(new_path)
return
children = node.get("children", [])
if not children:
paths.append(new_path)
return
if node.get("type") == "decision":
for child in children:
_walk(child["node"], new_path)
else:
for child in children:
_walk(child, new_path)
_walk(tree, [])
return paths
@staticmethod
def paths_to_text(paths: list[list[dict]]) -> str:
"""Render extracted paths as human-readable text for review."""
lines: list[str] = []
for i, path in enumerate(paths, 1):
steps = []
for node in path:
if node["type"] == "decision":
steps.append(f"[判断] {node['name']}")
elif node["type"] == "end":
steps.append(f"[结束] {node['name']}")
else:
steps.append(f"[{node['type']}] {node['name']}")
lines.append(f"路径 {i}: {' -> '.join(steps)}")
return "\n".join(lines)
@staticmethod
def _extract_json(text: str) -> Optional[dict]:
"""Try multiple strategies to extract a JSON object from text.
Returns the parsed dict or None.
"""
# Strategy 1: first { ... } pair (simple regex)
json_match = re.search(r'\{.*\}', text, re.DOTALL)
if json_match:
try:
return json.loads(json_match.group())
except json.JSONDecodeError:
pass
# Strategy 2: find balanced braces
start = text.find("{")
if start >= 0:
depth = 0
for i in range(start, len(text)):
if text[i] == "{":
depth += 1
elif text[i] == "}":
depth -= 1
if depth == 0:
try:
return json.loads(text[start:i + 1])
except json.JSONDecodeError:
break
return None
@staticmethod
def _mime_type(image_path: str) -> str:
@@ -0,0 +1,384 @@
#!/usr/bin/env python3
"""Verify flowchart logic trees for structural correctness and consistency.
Usage::
python verify_flowchart.py <parsed.json|flowchart.json> [--llm] [--output-report REPORT.md]
Performs three levels of checks:
1. **Structural validation** — tree integrity, node uniqueness, leaf types
2. **Path extraction** — renders all root-to-leaf paths as readable text
3. **LLM consistency check** (opt-in with ``--llm``) — compares extracted paths
against the original text description for logical inconsistencies
Outputs PASS/FAIL and a detailed report.
"""
import argparse
import json
import logging
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from image_parser import ImageParser
from LLM import LLMClient
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Prompt for LLM path-vs-description consistency check
# ---------------------------------------------------------------------------
PROMPT_VERIFY_PATHS = """你是一个流程图审核专家。以下内容来自同一张流程图的解析结果:
## 流程图路径(从嵌套逻辑树提取的所有根到叶路径)
```
{paths_text}
```
## 原始文字描述
```
{description}
```
## 你的任务
逐条检查每条路径是否与文字描述一致。重点关注:
1. **分支方向错误**:路径中的判断分支走向是否与文字描述矛盾?
例如:文字说"满足条件后退出",但路径中""分支走向了"不受限"
2. **缺失步骤**:路径中是否缺少文字描述中提到的关键步骤?
3. **冗余步骤**:路径中是否包含文字描述未提及的多余步骤?
4. **条件颠倒**:判断条件的"是/否"分支是否与文字描述相反?
## 输出格式
如果**所有路径一致**,只输出:
```
[[PATHS_CONSISTENT]]
```
如果**发现不一致**,输出 JSON 数组:
```json
[
{{
"path_index": 1,
"issue_type": "branch_error|missing_step|redundant_step|condition_reversed",
"severity": "high|medium|low",
"description": "用中文说明具体问题"
}}
]
```
注意:输出必须是严格合法的 JSON 数组,不要有尾随逗号,不要包含代码块包裹符号。
"""
# ---------------------------------------------------------------------------
# Core verification logic
# ---------------------------------------------------------------------------
def verify_parsed_json(parsed_path: str, *, use_llm: bool = False) -> dict:
"""Load _parsed.json and verify all flowchart logic trees.
Returns a report dict with keys:
- total_flowcharts: int
- passed: int
- failed: int
- results: list of per-flowchart results
"""
with open(parsed_path, "r", encoding="utf-8") as f:
data = json.load(f)
image_analysis = data.get("image_analysis", [])
flowcharts = [img for img in image_analysis if img.get("type") == "flowchart"]
report = {
"total_flowcharts": len(flowcharts),
"passed": 0,
"failed": 0,
"results": [],
}
llm = LLMClient() if use_llm else None
for img in flowcharts:
rid = img.get("rid", "unknown")
logger.info("Verifying flowchart: rid=%s", rid)
result = _verify_single(img, llm)
report["results"].append(result)
if result["structural_ok"] and (not use_llm or result.get("llm_ok", True)):
report["passed"] += 1
else:
report["failed"] += 1
return report
def verify_flowchart_file(filepath: str, *, use_llm: bool = False) -> dict:
"""Load a standalone flowchart JSON file and verify it."""
with open(filepath, "r", encoding="utf-8") as f:
tree = json.load(f)
img = {"logic_tree_nested": tree, "description": "", "rid": os.path.basename(filepath)}
llm = LLMClient() if use_llm else None
result = _verify_single(img, llm)
return {
"total_flowcharts": 1,
"passed": 1 if result["structural_ok"] else 0,
"failed": 0 if result["structural_ok"] else 1,
"results": [result],
}
def _verify_single(img: dict, llm: LLMClient | None) -> dict:
"""Verify a single flowchart image analysis entry."""
rid = img.get("rid", "unknown")
description = img.get("description", "").strip()
# Try nested format first, fall back to flat format
tree = img.get("logic_tree_nested") or img.get("logic_tree")
if tree is None:
return {
"rid": rid,
"structural_ok": False,
"errors": ["No logic_tree found"],
"paths_text": "",
"llm_issues": [],
}
# Check if it's the new nested format or old flat format
is_nested = "children" in tree and isinstance(tree.get("children"), list)
# --- Level 1: Structural validation ---
structural_ok = True
errors: list[str] = []
if is_nested:
ok, err = ImageParser._validate_flowchart(tree)
if not ok:
structural_ok = False
errors.append(f"Structure: {err}")
# Extract paths
paths = ImageParser.extract_paths(tree)
paths_text = ImageParser.paths_to_text(paths)
errors.append(f"Path count: {len(paths)}")
else:
# Old flat format — basic check
nodes = tree.get("nodes", [])
ids = [n.get("id", "") for n in nodes]
if len(ids) != len(set(ids)):
structural_ok = False
errors.append("Structure: duplicate node ids in flat format")
# Build simple path-like text for flat format
paths_text = _flat_to_text(tree)
# --- Level 2: Path count sanity check ---
if is_nested and len(paths) == 0:
structural_ok = False
errors.append("No paths extracted from tree")
# --- Level 3: LLM consistency check ---
llm_issues: list[dict] = []
llm_ok = True
if llm and description and paths_text:
prompt = PROMPT_VERIFY_PATHS.format(
paths_text=paths_text,
description=description,
)
try:
raw = llm.chat(
model=LLMClient.TEXT_MODEL,
messages=[{"role": "user", "content": prompt}],
)
llm_issues = _parse_llm_issues(raw)
if llm_issues:
llm_ok = False
errors.append(f"LLM found {len(llm_issues)} issue(s)")
except RuntimeError as e:
errors.append(f"LLM check failed: {e}")
return {
"rid": rid,
"structural_ok": structural_ok,
"errors": errors,
"paths_text": paths_text,
"llm_ok": llm_ok,
"llm_issues": llm_issues,
}
def _flat_to_text(tree: dict) -> str:
"""Build path-like text from old flat-format logic_tree."""
nodes = tree.get("nodes", [])
root = tree.get("root", "")
lines = [f"Root: {root}"]
node_map = {n["id"]: n for n in nodes}
def _trace(node_id: str, visited: set, path: list[str]) -> list[str]:
if node_id in visited:
path.append(f"[循环] {node_id}")
return path
visited.add(node_id)
node = node_map.get(node_id)
if node is None:
path.append(f"[缺失] {node_id}")
return path
ntype = node.get("type", "")
if ntype == "decision":
cond = node.get("condition", "")
for b in node.get("branches", []):
val = b.get("value", "")
tgt = b.get("target", "")
new_path = path + [f"[判断] {cond}{val}"]
_trace(tgt, visited.copy(), new_path)
elif ntype == "end":
path.append(f"[结束] {node.get('description', '')}")
lines.append(" -> ".join(path))
else:
path.append(f"[{ntype}] {node.get('description', '')}")
# Flat format doesn't have explicit children for non-decision nodes
# so we can't trace further
lines.append(" -> ".join(path))
return path
# Try to find start nodes
starts = [n for n in nodes if n.get("type") == "start"]
if starts:
for s in starts:
_trace(s["id"], set(), [])
else:
lines.append("(Cannot trace: no start node in flat format)")
return "\n".join(lines)
def _parse_llm_issues(content: str) -> list[dict]:
"""Parse LLM response for path consistency issues."""
stripped = content.strip()
if "[[PATHS_CONSISTENT]]" in stripped:
return []
# Remove markdown code fences
if "```json" in stripped:
stripped = stripped.split("```json", 1)[1]
if "```" in stripped:
stripped = stripped.split("```", 1)[0]
elif "```" in stripped:
stripped = stripped.split("```", 1)[1]
if "```" in stripped:
stripped = stripped.split("```", 1)[0]
stripped = stripped.strip()
if not stripped:
return []
try:
issues = json.loads(stripped)
if isinstance(issues, list):
return issues
return []
except json.JSONDecodeError:
logger.debug("Failed to parse LLM issues: %s", stripped[:200])
return []
# ---------------------------------------------------------------------------
# Report rendering
# ---------------------------------------------------------------------------
def print_report(report: dict) -> str:
"""Print a human-readable verification report and return it as a string."""
lines: list[str] = []
lines.append("=" * 60)
lines.append("流程图校验报告")
lines.append("=" * 60)
lines.append(f"流程图总数: {report['total_flowcharts']}")
lines.append(f"通过: {report['passed']}")
lines.append(f"失败: {report['failed']}")
overall = "PASS" if report["failed"] == 0 else "FAIL"
lines.append(f"总体结果: {overall}")
lines.append("")
for i, r in enumerate(report["results"], 1):
rid = r["rid"]
status = "[PASS]" if r["structural_ok"] else "[FAIL]"
lines.append(f"[{i}] rid={rid} {status}")
for err in r.get("errors", []):
lines.append(f" - {err}")
if r.get("paths_text"):
lines.append(" 路径:")
for path_line in r["paths_text"].split("\n"):
lines.append(f" {path_line}")
llm_issues = r.get("llm_issues", [])
if llm_issues:
lines.append(" LLM发现的问题:")
for issue in llm_issues:
lines.append(f" [{issue.get('severity', '?')}] {issue.get('description', '')}")
lines.append("")
report_text = "\n".join(lines)
print(report_text)
return report_text
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description="Verify flowchart logic trees for correctness.",
)
parser.add_argument(
"input", metavar="FILE",
help="Path to _parsed.json or standalone flowchart JSON",
)
parser.add_argument(
"--llm", action="store_true",
help="Run LLM consistency check (compares paths against text description)",
)
parser.add_argument(
"--output-report", metavar="PATH",
help="Save verification report to a file",
)
args = parser.parse_args()
# Determine input type
with open(args.input, "r", encoding="utf-8") as f:
data = json.load(f)
if "image_analysis" in data:
report = verify_parsed_json(args.input, use_llm=args.llm)
else:
report = verify_flowchart_file(args.input, use_llm=args.llm)
report_text = print_report(report)
if args.output_report:
with open(args.output_report, "w", encoding="utf-8") as f:
f.write(report_text)
logger.info("Report saved: %s", args.output_report)
# Exit code: 0 for PASS, 1 for FAIL
if report["failed"] > 0:
sys.exit(1)
if __name__ == "__main__":
main()
+9
View File
@@ -0,0 +1,9 @@
# Generated output
output/
# Python
__pycache__/
*.pyc
# Console log
Console output.txt
+137
View File
@@ -0,0 +1,137 @@
"""
Shared configuration for the IR Generation pipeline.
Reads API keys from a secrets.yaml file, falling back to environment variables.
"""
import os
import json
import yaml
# ---- Paths ----
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
WORKSPACE_DIR = os.path.dirname(BASE_DIR)
DOC_PARSER_OUTPUT = os.path.join(WORKSPACE_DIR, "doc_parser_skill", "output")
PROMPTS_DIR = os.path.join(BASE_DIR, "prompts")
TESTS_DIR = os.path.join(BASE_DIR, "tests")
OUTPUT_DIR = os.path.join(BASE_DIR, "output")
# Input file (the parsed PRD JSON)
_DEFAULT_INPUT = os.path.join(
DOC_PARSER_OUTPUT,
"车机娱乐系统禁止功能文档_脱敏 v0.9_v2_updated.json",
)
INPUT_JSON = os.environ.get("IR_INPUT_JSON", _DEFAULT_INPUT)
def set_input_file(path: str) -> None:
"""Override the default input JSON path."""
global INPUT_JSON
INPUT_JSON = path
# Secrets file (shared with workspace-document-analyzer)
# .openclaw/workspace/skills/ir_generation_new_skill -> .openclaw/workspace-document-analyzer
OPENCLAW_HOME = os.path.dirname(os.path.dirname(WORKSPACE_DIR))
SECRETS_YAML = os.path.join(
OPENCLAW_HOME, "workspace-document-analyzer", "config", "secrets.yaml",
)
# Intermediate outputs
SEMANTIC_INDEX_R1_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r1.json")
SEMANTIC_INDEX_R2_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r2.json")
SEMANTIC_INDEX_R3_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r3.json")
SEMANTIC_INDEX_JSON = os.path.join(OUTPUT_DIR, "semantic_index.json") # merged final
IR_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_fragments.json")
PATH_ENUM_JSON = os.path.join(OUTPUT_DIR, "path_enumeration.json")
IR_AUTOCOMPLETE_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_autocomplete_fragments.json")
# Final deliverables (placed in doc_parser output per spec)
IR_FINAL_JSON = os.path.join(DOC_PARSER_OUTPUT, "ir_final.json")
IR_AUDIT_REPORT_MD = os.path.join(DOC_PARSER_OUTPUT, "ir_audit_report.md")
# ---- LLM API ----
# Choose provider: "deepseek" | "dashscope"
LLM_PROVIDER = os.environ.get("IR_PROVIDER", "deepseek")
# Model names per provider
PROVIDER_MODELS = {
"deepseek": os.environ.get("IR_MODEL", "deepseek-v4-flash"),
"dashscope": os.environ.get("IR_MODEL", "qwen-max"),
}
MODEL_NAME = PROVIDER_MODELS.get(LLM_PROVIDER, PROVIDER_MODELS["deepseek"])
# Maximum tokens for LLM responses
MAX_TOKENS = int(os.environ.get("IR_MAX_TOKENS", "16000"))
TEMPERATURE = float(os.environ.get("IR_TEMPERATURE", "0.1"))
# ---- Iteration & Quality ----
MAX_RETRIES_PER_STAGE = int(os.environ.get("IR_MAX_RETRIES", "3"))
COVERAGE_TARGET = float(os.environ.get("IR_COVERAGE_TARGET", "0.95"))
# Stage 1 ensemble temperatures (parallel multi-temperature generation)
ENSEMBLE_TEMPERATURES = [
float(os.environ.get("IR_ENSEMBLE_T1", "0.0")),
float(os.environ.get("IR_ENSEMBLE_T2", "0.3")),
float(os.environ.get("IR_ENSEMBLE_T3", "0.7")),
]
def _load_secrets() -> dict[str, dict[str, str]]:
"""Load provider credentials from secrets.yaml.
Returns a dict like: {"deepseek": {"apiKey": "...", "baseUrl": "..."}, ...}
"""
if os.path.isfile(SECRETS_YAML):
with open(SECRETS_YAML, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
return {}
def _get_provider_config(provider: str) -> dict[str, str]:
"""Get {apiKey, baseUrl} for a provider from secrets, with env-var fallback."""
secrets = _load_secrets()
entry = secrets.get(provider, {})
env_prefix = provider.upper()
api_key = (
os.environ.get(f"{env_prefix}_API_KEY")
or entry.get("apiKey", "")
)
base_url = (
os.environ.get(f"{env_prefix}_BASE_URL")
or entry.get("baseUrl", "https://api.deepseek.com/v1")
)
if not api_key:
raise RuntimeError(
f"No API key found for provider '{provider}'. "
f"Check {SECRETS_YAML} or set {env_prefix}_API_KEY."
)
return {"apiKey": api_key, "baseUrl": base_url}
def llm_client():
"""Return an OpenAI-compatible client configured from secrets.yaml."""
from openai import OpenAI
cfg = _get_provider_config(LLM_PROVIDER)
return OpenAI(base_url=cfg["baseUrl"], api_key=cfg["apiKey"])
def load_input_document(path: str | None = None) -> dict:
"""Load the parsed PRD JSON document."""
path = path or INPUT_JSON
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_json(data, path: str) -> None:
"""Save data as formatted JSON."""
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def load_json(path: str) -> dict:
"""Load a JSON file."""
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
@@ -0,0 +1,593 @@
"""
Deterministic ensemble merge for semantic index generation.
All functions are pure Python with zero LLM calls. Fully testable with mock data.
Cross-references N semantic_index outputs (generated with different temperatures)
and produces a single merged index with confidence scores.
Used by: step1_semantic_index.py
Tested by: tests/test_ensemble_merge.py
"""
from collections import defaultdict
from difflib import SequenceMatcher
# =============================================================================
# Concept Name Similarity
# =============================================================================
def concept_name_similarity(name_a: str, name_b: str) -> float:
"""Compute similarity between two concept names for cross-version matching.
Strategy (in order of precedence):
1. Exact string match -> 1.0
2. Substring containment (one is a substring of the other) -> 0.9
3. SequenceMatcher ratio on character sequences -> 0.0-1.0
Returns:
float in [0.0, 1.0] where >= 0.7 means "likely the same concept".
"""
if name_a == name_b:
return 1.0
# Substring containment: one name is contained in the other
if name_a in name_b or name_b in name_a:
# Only count as similar if they're of comparable length
# (avoid matching "国内" with "国内行车娱乐限制")
len_ratio = min(len(name_a), len(name_b)) / max(len(name_a), len(name_b))
if len_ratio >= 0.5:
return 0.85 + 0.05 * len_ratio # range 0.875-0.90
return 0.55 # too different in length → below threshold
return SequenceMatcher(None, name_a, name_b).ratio()
# =============================================================================
# Concept Clustering & Merging
# =============================================================================
def cluster_concepts(
all_concepts_lists: list[list[dict]],
similarity_threshold: float = 0.7,
) -> list[list[tuple[int, dict]]]:
"""Group concepts across ensemble versions by name similarity.
Uses greedy single-pass clustering: for each concept, find the best-matching
existing cluster. If max similarity >= threshold, add to it; otherwise,
create a new cluster.
Args:
all_concepts_lists: List of concept lists, one per ensemble version.
all_concepts_lists[i] = concepts from version i.
similarity_threshold: Minimum name similarity to join a cluster.
Returns:
List of clusters. Each cluster is list of (version_idx, concept_dict).
"""
clusters = [] # type: list[list[tuple[int, dict]]]
for version_idx, concepts in enumerate(all_concepts_lists):
for c in concepts:
name = c.get("name", "")
if not name:
continue
best_cluster = None
best_sim = 0.0
for cluster in clusters:
# Compare against the first member of the cluster (seed)
seed_name = cluster[0][1].get("name", "")
sim = concept_name_similarity(name, seed_name)
if sim > best_sim:
best_sim = sim
best_cluster = cluster
if best_cluster is not None and best_sim >= similarity_threshold:
best_cluster.append((version_idx, c))
else:
clusters.append([(version_idx, c)])
return clusters
def merge_concept_cluster(
cluster: list[tuple[int, dict]],
total_versions: int,
) -> tuple[dict, str]:
"""Merge a single cluster of matched concepts into one concept dict.
Rules:
- name: Longest name (most specific). Tie-break by lower version_idx.
- aliases: Union of all aliases across versions.
- defined_in: Union of all defined_in across versions.
- parent: Most common non-null parent (voting). Tie-break by lower version_idx.
Returns:
(merged_concept_dict, confidence_level) where confidence is "high"/"medium"/"low".
"""
if not cluster:
return {}, "low"
# --- name: longest (most specific) ---
best_name = ""
best_name_len = 0
for v_idx, c in cluster:
n = c.get("name", "")
if len(n) > best_name_len:
best_name = n
best_name_len = len(n)
elif len(n) == best_name_len and v_idx < cluster[0][0]: # lower version idx
best_name = n
# --- aliases: union ---
aliases = set()
for _, c in cluster:
for a in c.get("aliases", []):
aliases.add(a)
# --- defined_in: union ---
defined_in = set()
for _, c in cluster:
for d in c.get("defined_in", []):
defined_in.add(d)
# --- parent: most common non-null parent (vote) ---
parent_votes = defaultdict(int)
for v_idx, c in cluster:
p = c.get("parent")
if p is not None:
parent_votes[p] += 1
if parent_votes:
best_parent = max(parent_votes, key=lambda p: (parent_votes[p], -1))
else:
best_parent = None
# --- confidence ---
versions_present = len({v_idx for v_idx, _ in cluster})
confidence = compute_confidence_versions(versions_present, total_versions,
any(v_idx == 0 for v_idx, _ in cluster))
merged = {
"name": best_name,
"aliases": sorted(aliases),
"defined_in": sorted(defined_in),
"parent": best_parent,
"confidence": confidence,
}
return merged, confidence
# =============================================================================
# Unit Similarity Functions
# =============================================================================
def _collect_logic_tree_nodes(unit: dict) -> set[str]:
"""Extract the flattened set of all logic tree node IDs from a function_unit."""
nodes = set()
for src in unit.get("sources", []):
if src.get("type") == "logic_tree":
nodes.update(src.get("logic_tree_nodes", []))
return nodes
def unit_node_jaccard(unit_a: dict, unit_b: dict) -> float:
"""Compute Jaccard similarity on logic tree node sets between two units.
Jaccard(A, B) = |A ∩ B| / |A B|. Returns 0.0 if both have no nodes.
"""
nodes_a = _collect_logic_tree_nodes(unit_a)
nodes_b = _collect_logic_tree_nodes(unit_b)
if not nodes_a and not nodes_b:
return 0.0
if not nodes_a or not nodes_b:
return 0.0
intersection = nodes_a & nodes_b
union = nodes_a | nodes_b
return len(intersection) / len(union)
def path_similarity(path_a: list[str], path_b: list[str]) -> float:
"""Compute similarity between two path arrays.
Hybrid approach:
- Sequential similarity (order-aware): SequenceMatcher on joined strings.
- Set similarity (order-independent): Jaccard on path element sets.
- Final score: 0.5 * seq_sim + 0.5 * set_sim
Returns:
float in [0.0, 1.0].
"""
if not path_a and not path_b:
return 1.0
if not path_a or not path_b:
return 0.0
# Sequential similarity
joined_a = "|".join(path_a)
joined_b = "|".join(path_b)
seq_sim = SequenceMatcher(None, joined_a, joined_b).ratio()
# Set similarity
set_a = set(path_a)
set_b = set(path_b)
set_sim = len(set_a & set_b) / len(set_a | set_b)
return 0.5 * seq_sim + 0.5 * set_sim
def unit_similarity(unit_a: dict, unit_b: dict) -> float:
"""Combined similarity between two function_units.
Weighted combination:
- 0.6 * unit_node_jaccard (primary signal: same logic tree nodes = same rule)
- 0.4 * path_similarity (secondary signal: semantic agreement)
Returns:
float in [0.0, 1.0]. >= 0.5 means "likely the same function_unit".
"""
return 0.6 * unit_node_jaccard(unit_a, unit_b) + 0.4 * path_similarity(
unit_a.get("path", []), unit_b.get("path", [])
)
# =============================================================================
# Function Unit Clustering & Merging
# =============================================================================
def cluster_function_units(
all_units_lists: list[list[dict]],
similarity_threshold: float = 0.5,
) -> list[list[tuple[int, dict]]]:
"""Group function_units across ensemble versions by content similarity.
Lowest-temperature versions are processed first (most stable → cluster seeds).
Higher-temperature variants join existing clusters if similar enough.
Args:
all_units_lists: List of unit lists, one per ensemble version.
similarity_threshold: Minimum unit_similarity to join a cluster.
Returns:
List of clusters. Each cluster is list of (version_idx, unit_dict).
"""
clusters = [] # type: list[list[tuple[int, dict]]]
for version_idx, units in enumerate(all_units_lists):
for unit in units:
best_cluster = None
best_sim = 0.0
for cluster in clusters:
# Compare against all members already in the cluster
cluster_sim = max(
unit_similarity(unit, existing_unit)
for (_, existing_unit) in cluster
)
if cluster_sim > best_sim:
best_sim = cluster_sim
best_cluster = cluster
if best_cluster is not None and best_sim >= similarity_threshold:
best_cluster.append((version_idx, unit))
else:
clusters.append([(version_idx, unit)])
return clusters
def pick_best_representative(
cluster: list[tuple[int, dict]],
) -> dict:
"""Select the best function_unit from a cluster as the merged representative.
Scoring formula (all normalized to [0, 1]):
- 0.35: Node count (more logic_tree_nodes = more complete trace)
- 0.25: Source count (more sources = more evidence)
- 0.20: Description length (longer = more detail, capped at 500 chars)
- 0.20: Temperature rank (lower version_idx = lower temp = more stable)
Returns a deep copy of the winning unit dict.
"""
if not cluster:
return {}
# Compute max values for normalization
max_nodes = max(
len(_collect_logic_tree_nodes(unit)) for _, unit in cluster
)
max_sources = max(
len(unit.get("sources", [])) for _, unit in cluster
)
max_desc_len = max(
len(unit.get("description", "")) for _, unit in cluster
)
max_version_idx = max(v_idx for v_idx, _ in cluster)
num_versions = len(cluster)
def score(v_idx: int, unit: dict) -> float:
nodes = len(_collect_logic_tree_nodes(unit))
sources = len(unit.get("sources", []))
desc_len = min(len(unit.get("description", "")), 500)
temp_rank = 1.0 - (v_idx / max(num_versions, max_version_idx + 1))
return (
0.35 * (nodes / max(1, max_nodes))
+ 0.25 * (sources / max(1, max_sources))
+ 0.20 * (desc_len / max(1, max_desc_len))
+ 0.20 * temp_rank
)
best = max(cluster, key=lambda x: score(x[0], x[1]))
return dict(best[1]) # deep-ish copy (1 level)
def merge_unit_sources(
cluster: list[tuple[int, dict]],
) -> list[dict]:
"""Union all sources from units in a cluster, deduplicating by (type, image_id, section).
When the same source key appears in multiple versions, keeps the one with
the most logic_tree_nodes.
"""
# Group by dedup key
source_groups = defaultdict(list)
for v_idx, unit in cluster:
for src in unit.get("sources", []):
# Build a dedup key
src_type = src.get("type", "")
if src_type == "logic_tree":
key = ("logic_tree", src.get("image_id", ""))
else:
key = (src_type, src.get("section", ""), src.get("row", ""))
source_groups[key].append(src)
# Pick best per group
result = []
for key, sources in source_groups.items():
# Pick the source with the most logic_tree_nodes (if any)
best = max(sources, key=lambda s: len(s.get("logic_tree_nodes", [])))
result.append(dict(best))
return result
def compute_confidence_versions(
versions_present: int,
total_versions: int,
includes_lowest_temp: bool = False,
) -> str:
"""Compute 3-level confidence based on cross-version agreement.
- "high": Appears in all versions, OR >= 2/3 with lowest-temp version (T=0.0).
- "medium": Appears in >= half the versions but not all.
- "low": Appears in fewer than half (singleton in ensemble).
Args:
versions_present: Number of versions this item appeared in.
total_versions: Total number of ensemble versions.
includes_lowest_temp: Whether the item appeared in the T=0.0 version.
"""
ratio = versions_present / total_versions
if ratio >= 1.0:
return "high"
if ratio >= 0.5 and includes_lowest_temp:
return "high"
if ratio >= 0.5:
return "medium"
return "low"
def ensemble_merge_concepts(
all_concepts_lists: list[list[dict]],
) -> list[dict]:
"""Merge concepts across all ensemble versions.
Returns:
List of merged concept dicts, each with added "confidence" field.
"""
total = len(all_concepts_lists)
clusters = cluster_concepts(all_concepts_lists)
merged = []
seen_names = set()
for cluster in clusters:
concept, confidence = merge_concept_cluster(cluster, total)
name = concept.get("name", "")
if name and name not in seen_names:
concept["ensemble_support"] = f"{len({v for v, _ in cluster})}/{total}"
merged.append(concept)
seen_names.add(name)
# Sort: high confidence first, then by name
conf_order = {"high": 0, "medium": 1, "low": 2}
merged.sort(key=lambda c: (conf_order.get(c.get("confidence", "low"), 3), c.get("name", "")))
# Validate and fix parent references
merged = _validate_concept_parents(merged)
return merged
def _validate_concept_parents(concepts: list[dict]) -> list[dict]:
"""Post-merge: validate that every concept's parent exists in the list.
Strategy for dangling parents:
1. Fuzzy match (concept_name_similarity >= 0.7) → fix reference
2. No match → set parent to null, downgrade confidence to "low"
"""
concept_names = {c["name"] for c in concepts}
conf_order = {"high": 0, "medium": 1, "low": 2}
for c in concepts:
parent = c.get("parent")
if parent is None:
continue
if parent in concept_names:
continue
# Dangling parent — try fuzzy match
best_match = None
best_sim = 0.0
for name in concept_names:
sim = concept_name_similarity(parent, name)
if sim > best_sim:
best_sim = sim
best_match = name
if best_match and best_sim >= 0.7:
c["parent"] = best_match
# Downgrade if match was fuzzy (not exact)
if best_sim < 1.0:
current_conf = c.get("confidence", "low")
c["confidence"] = _downgrade_confidence(current_conf)
else:
c["parent"] = None
c["confidence"] = _downgrade_confidence(c.get("confidence", "low"))
# Re-sort after confidence changes
concepts.sort(key=lambda c: (conf_order.get(c.get("confidence", "low"), 3), c.get("name", "")))
return concepts
def _downgrade_confidence(current: str) -> str:
"""Drop confidence one level."""
if current == "high":
return "medium"
return "low"
def ensemble_merge_function_units(
all_units_lists: list[list[dict]],
) -> list[dict]:
"""Merge function_units across all ensemble versions.
1. Cluster units across versions.
2. For each cluster: pick best, merge sources, compute confidence.
3. Reassign stable unit_ids: FU-ENS-001, FU-ENS-002, ...
Returns:
List of merged function_unit dicts with added "confidence",
"ensemble_support", "source_versions" fields.
"""
total = len(all_units_lists)
clusters = cluster_function_units(all_units_lists)
merged = []
for cluster in clusters:
# Pick best representative
best = pick_best_representative(cluster)
# Merge sources from all cluster members
best["sources"] = merge_unit_sources(cluster)
# Compute confidence
versions_present = len({v_idx for v_idx, _ in cluster})
includes_t0 = any(v_idx == 0 for v_idx, _ in cluster)
confidence = compute_confidence_versions(
versions_present, total, includes_t0
)
best["confidence"] = confidence
best["ensemble_support"] = f"{versions_present}/{total}"
best["source_versions"] = versions_present
merged.append(best)
# Sort by confidence desc, then by unit_id
conf_order = {"high": 0, "medium": 1, "low": 2}
merged.sort(key=lambda u: (conf_order.get(u.get("confidence", "low"), 3),
u.get("unit_id", "")))
# Reassign stable unit_ids
for i, unit in enumerate(merged):
# Preserve original unit_id for traceability
if "original_unit_id" not in unit:
unit["original_unit_id"] = unit.get("unit_id", "")
unit["unit_id"] = f"FU-ENS-{i + 1:03d}"
return merged
# =============================================================================
# Top-Level Ensemble Merge
# =============================================================================
def ensemble_merge(
semantic_indices: list[dict],
) -> dict:
"""Merge N semantic index outputs into one ensemble result.
Args:
semantic_indices: List of semantic_index dicts from each temperature run.
semantic_indices[0] should be the lowest-temperature version.
Returns:
Merged semantic_index dict with structure:
{
"feature_name": str,
"ensemble_versions": int,
"concepts": [...],
"function_units": [...],
"confidence_summary": {...},
}
"""
if not semantic_indices:
return {
"feature_name": "",
"ensemble_versions": 0,
"concepts": [],
"function_units": [],
"confidence_summary": {},
}
total = len(semantic_indices)
# Extract concepts and function_units from each version
all_concepts = [si.get("concepts", []) for si in semantic_indices]
all_units = [si.get("function_units", []) for si in semantic_indices]
# Merge
merged_concepts = ensemble_merge_concepts(all_concepts)
merged_units = ensemble_merge_function_units(all_units)
# Feature name: majority vote across versions
feature_names = [si.get("feature_name", "") for si in semantic_indices]
name_counts = defaultdict(int)
for fn in feature_names:
if fn:
name_counts[fn] += 1
feature_name = max(name_counts, key=name_counts.get) if name_counts else ""
# Confidence summary
unit_conf = defaultdict(int)
for u in merged_units:
unit_conf[u.get("confidence", "low")] += 1
concept_conf = defaultdict(int)
for c in merged_concepts:
concept_conf[c.get("confidence", "low")] += 1
return {
"feature_name": feature_name,
"ensemble_versions": total,
"concepts": merged_concepts,
"function_units": merged_units,
"confidence_summary": {
"total_units": len(merged_units),
"high": unit_conf.get("high", 0),
"medium": unit_conf.get("medium", 0),
"low": unit_conf.get("low", 0),
"total_concepts": len(merged_concepts),
"concept_high": concept_conf.get("high", 0),
"concept_medium": concept_conf.get("medium", 0),
"concept_low": concept_conf.get("low", 0),
},
}
+157
View File
@@ -0,0 +1,157 @@
"""
IR Generation Pipeline Orchestrator.
Run all four stages sequentially:
python main.py [--skip-step1] [--skip-step2] [--skip-step2.5] [--skip-step3] [--test-only]
The pipeline reads the parsed PRD JSON from doc_parser and produces:
- ir_final.json: the final IR rules
- ir_audit_report.md: completeness audit report for human review
"""
import argparse
import os
import subprocess
import sys
from pathlib import Path
import config
BASE_DIR = Path(__file__).parent
def _subprocess_env(extra: dict | None = None) -> dict:
"""Build environment dict for subprocesses, carrying forward overrides."""
env = os.environ.copy()
env.update(extra or {})
return env
def run_step(script_name: str, description: str, extra_env: dict | None = None) -> bool:
"""Run a single pipeline step script, return True if it succeeded."""
print(f"\n{'#' * 60}")
print(f"# {description}")
print(f"{'#' * 60}")
script_path = BASE_DIR / script_name
if not script_path.exists():
print(f"错误: 脚本不存在 {script_path}")
return False
result = subprocess.run(
[sys.executable, str(script_path)],
cwd=str(BASE_DIR),
env=_subprocess_env(extra_env),
)
return result.returncode == 0
def run_test(test_name: str, description: str, extra_env: dict | None = None) -> bool:
"""Run a test script, return True if all tests passed."""
print(f"\n{'='*60}")
print(f"测试: {description}")
print(f"{'='*60}")
test_path = BASE_DIR / "tests" / test_name
if not test_path.exists():
print(f"错误: 测试脚本不存在 {test_path}")
return False
result = subprocess.run(
[sys.executable, str(test_path)],
cwd=str(BASE_DIR),
env=_subprocess_env(extra_env),
)
return result.returncode == 0
def main():
parser = argparse.ArgumentParser(description="IR Generation Pipeline")
parser.add_argument("--skip-step1", action="store_true",
help="跳过阶段一(语义索引)")
parser.add_argument("--skip-step2", action="store_true",
help="跳过阶段二(IR 提取)")
parser.add_argument("--skip-step2.5", "--skip-step2-5", action="store_true",
dest="skip_step2_5",
help="跳过阶段2.5(分支覆盖自动补全)")
parser.add_argument("--skip-step3", action="store_true",
help="跳过阶段三(合并与审计)")
parser.add_argument("--test-only", action="store_true",
help="仅运行测试,不调用 LLM")
parser.add_argument(
"--input", "-i", type=str, default=None,
help="输入 JSON 文件路径(覆盖默认的 doc_parser 输出)"
)
parser.add_argument(
"--provider", "-p", type=str, default=None,
help="LLM provider: deepseek | dashscope(覆盖 IR_PROVIDER 环境变量)"
)
args = parser.parse_args()
# Build extra env vars for subprocesses
extra_env = {}
if args.input:
extra_env["IR_INPUT_JSON"] = args.input
print(f"输入文件: {args.input}")
if args.provider:
extra_env["IR_PROVIDER"] = args.provider
print(f"LLM Provider: {args.provider}")
if args.test_only:
all_ok = True
all_ok &= run_test("test_step1.py", "Step 1 验证", extra_env)
all_ok &= run_test("test_step2.py", "Step 2 验证", extra_env)
all_ok &= run_test("test_step2_5.py", "Step 2.5 验证", extra_env)
all_ok &= run_test("test_step3.py", "Step 3 验证", extra_env)
sys.exit(0 if all_ok else 1)
failures = []
# Stage 1
if not args.skip_step1:
ok = run_step("step1_semantic_index.py",
"阶段一:宏观语义索引", extra_env)
if not ok:
failures.append("阶段一")
print("\n阶段一失败,停止流水线。修复后重试。")
sys.exit(1)
run_test("test_step1.py", "Step 1 验证", extra_env)
# Stage 2
if not args.skip_step2:
ok = run_step("step2_ir_extraction.py",
"阶段二:逐功能单元 IR 提取", extra_env)
if not ok:
failures.append("阶段二")
print("\n阶段二失败,停止流水线。修复后重试。")
sys.exit(1)
run_test("test_step2.py", "Step 2 验证", extra_env)
# Stage 2.5
if not args.skip_step2_5:
ok = run_step("step2_5_branch_coverage.py",
"阶段2.5:分支覆盖自动补全", extra_env)
if not ok:
failures.append("阶段2.5")
print("\n阶段2.5失败,停止流水线。修复后重试。")
sys.exit(1)
run_test("test_step2_5.py", "Step 2.5 验证", extra_env)
# Stage 3
if not args.skip_step3:
ok = run_step("step3_merge_and_audit.py",
"阶段三:确定性合并与完整性校验", extra_env)
if not ok:
failures.append("阶段三")
sys.exit(1)
run_test("test_step3.py", "Step 3 验证", extra_env)
if failures:
print(f"\n失败阶段: {', '.join(failures)}")
sys.exit(1)
print(f"\n{'='*60}")
print("流水线全部完成!")
print(f"最终 IR: {config.IR_FINAL_JSON}")
print(f"审计报告: {config.IR_AUDIT_REPORT_MD}")
print(f"{'='*60}")
if __name__ == "__main__":
main()
@@ -0,0 +1,46 @@
## 上一轮遗漏分析
上一轮生成的语义索引经过自动校验,发现以下问题需要修正:
### 遗漏的逻辑树路径
以下逻辑树决策路径未被任何 function_unit 覆盖,请为每条路径生成对应的 function_unit
{missing_paths}
### 遗漏的概念
以下关键概念未在 concepts 列表中出现,请补充:
{missing_concepts}
### 格式问题
以下 function_unit 或 concept 的格式不符合要求:
{format_issues}
### concept parent 问题
以下概念的 parent 引用有问题(悬空引用或缺少 parent):
{parent_issues}
---
请在本次生成中针对以上问题进行修正。注意:
1. 你不需要从头生成完整的语义索引,只需要输出**补充和修正**的部分
2. function_units 的输出应只包含本次新增或修正的单元(已有的正确单元不需要重复)
3. concepts 的输出应只包含本次新增或修正的概念
4. 如果格式问题中提到"空壳单元":删除该 unit,或将其合并到包含实际 action 的 unit 中。纯开关状态不是独立的功能行为
5. 如果格式问题中提到"不构成有效路径":说明你引用了互斥分支上的节点。检查 logic_tree_nodes,确保它们都落在逻辑树的**同一条分支路径**上(例如 n4 是关闭分支,n8 是开启分支,不能共存)
6. 如果格式问题提到"缺少 path"或"缺少 sources":补充对应字段
## 输出格式
只输出 JSON
{
"feature_name": "(与之前相同)",
"supplemental_function_units": [
// 只放新增的或修正的 function_unit
],
"supplemental_concepts": [
// 只放新增的或修正的 concept
],
"corrections": {
// 需要修正的已有项: { "unit_id或concept_name": { 修正后的字段 }, ... }
}
}
@@ -0,0 +1,123 @@
你是吉利汽车车机系统(XX Auto)的产品需求分析师。你的任务是从行车娱乐限制功能 PRD 文档中提取"语义索引"——一份结构化、有层级的功能清单,而不是逐字翻译。
## 文档结构说明
下面是一份 Word 文档的解析结果,包含:
1. **sections**:按章节组织的混合内容(段落 + 表格),每个 section 有 `source`(章节标题)、`blocks``para` 文本段落和 `table` 结构表格)、`images`(引用的图片 ID 列表)
2. **image_analysis**:文档中流程图的程序化分析结果,其中 `logic_tree` 是由节点组成的决策树:
- `state` 节点:状态说明
- `decision` 节点:判断条件 + `branches`(分支值 → 目标节点 ID)
- `action` 节点:系统或用户交互动作
3. **resolved_conflicts**:文档中图文冲突的仲裁结果,明确指出应以文字还是图片为准
## 文档全文
{document_json}
## 你的任务
阅读整份文档后,输出一份 **语义索引 JSON**,包含:
### 1. feature_name
功能名称,如"行车娱乐限制"
### 2. concepts(带层级)
文档中定义或使用的关键概念列表。每个概念包含:
- `name`:概念的标准名称(必填)
- `aliases`:同义词/别名列表(如"行车娱乐限制"、"行车娱乐禁止"
- `defined_in`:定义该概念的章节号列表(如 ["3.1", "3.1.1"]
- `parent`:父概念名称(字符串或 null)(必填)
**概念层级规则(重要)**
你必须按照以下 4 层结构组织概念,并为每个概念指定正确的 `parent`:
- **Level 0(地理范围)**: "国内"、"海外" — parent 为 null
- **Level 1(功能)**: "行车娱乐限制"、"行车娱乐禁止" — parent 为对应的 scope(如 "国内"
- **Level 2(限制方式)**: "系统限制"、"SDK限制"、"其他应用" — parent 为对应的 feature
- **Level 3(具体行为)**: "前台打断"、"后台限制启动"、"后台暂停功能"、"无限制" — parent 为对应的 method
除了以上层级,还可以有"行车娱乐限制开关"、"车速条件"、"档位条件"、"Toast提示"等辅助概念,它们应有合理的 parent。
**重要约束:每个 concept 的 parent 值必须是 concepts 列表中已存在的另一个 concept 的 name,或者是 null。禁止引用不存在的概念名。**
### 3. function_units(带路径)
文档中描述的所有主要功能行为的列表。**每个 function_unit 对应逻辑树中的一条叶子路径**。每个 function unit 包含:
- `unit_id`:唯一标识,格式 "FU-001", "FU-002"...
- `name`:简短名称,如"国内-系统限制-前台-行车打断"
- `description`1-3 句描述该规则的行为
- `path`:层级路径数组,从高到低,如 `["国内", "系统限制", "前台打断"]`(必填)。**path 中的每个元素必须是 concepts 列表中已存在的概念名。**
- `sources`:该规则在文档中的来源锚点列表,每项包含:
- `section`:章节号
- `type`:来源类型,`"table"` 或 `"para"` 或 `"logic_tree"`
- `row`:如果是表格行(从 1 开始)
- `text_snippet`:前 200 字的关键文字
- `image_id`:如果是逻辑树来源,填写图片 rId
- `logic_tree_nodes`:如果是逻辑树来源,列出相关节点 ID 列表
## function_units 分解策略(重要)
**按逻辑树的每条叶子路径生成一个 function_unit**
1. **叶子路径 = 从根节点到叶子节点(end 类型)的完整决策链**,包含路径上所有中间节点和叶子节点的最终动作
2. **每条叶子路径对应一个 function_unit**:不同决策分支导向不同叶子节点 → 不同的 function_unit
3. **"不受限"叶子节点也必须建模**:即使 action 是"不执行任何限制操作",也要创建对应的 function_unit
4. **禁止合并不同叶子节点**:不要将多个不同叶子节点的结果合并到一个 function_unit(除非它们触发完全相同的动作且属于同一父分支)
5. **文字描述中的功能单独列出**:对于无法对应到逻辑树节点的功能(如纯文字描述的功能行为),用 table/para 类型 sourcepath 用语义路径
6. **非流程图的图片也可能包含功能行为**:rId18 等图片的描述文本中可能包含功能规则(如"使用语音打开受限应用"),同样需要提取为 function_unit
**重要:不要创建纯开关/状态的空壳 unit**。"开关开启"本身不是一个功能行为(它没有 action),它是其他单元的 precondition。如果一个 function_unit 的 path 只有 `["国内", "开关开启"]` 且 sources 中只有 n1/n2/n3 这样的根/开关节点,说明它不是真正的功能单元,不应该输出。
{feedback}
## 权威性规则
1. **逻辑树(流程图)是权威来源**:逻辑树定义了功能的确切行为。识别 function_unit 时必须优先按逻辑树路径建模。文字和表格用于补充描述、提供确切措辞(如 Toast 文案),但不应覆盖或曲解逻辑树路径。
2. **logic_tree_nodes 必须构成有效路径**:每个 function_unit 引用的 logic_tree_nodes 列表,必须对应逻辑树中的**一条连通路径**。禁止将互斥分支上的节点混入同一个 source(例如 n4 是"开关关闭"分支,n8 是"开关开启"分支的下游节点,它们不能出现在同一 function_unit 中)。
3. **resolved_conflicts 中的仲裁是最终决定**:如果文档有图文冲突且已仲裁,严格按仲裁结果处理。
4. **逻辑树路径应全部覆盖**:下面是程序从文档逻辑树中枚举的全部决策路径,请逐一确认每条路径都有对应的 function_unit
{logic_tree_paths}
## 关键要求
1. **必须覆盖所有逻辑树路径**:上面列出的每条路径必须被至少一个 function_unit 的 sources 引用。
2. **必须覆盖表格中的所有规则**:表格中列出的每种"限制方法"、"限制规则"都要有对应的 function_unit。
3. **区分"限制"与"禁止"**:文档中"行车娱乐限制"(前台应用打断)和"行车娱乐禁止"(后台应用启动限制)是两个不同的子场景,必须分别建模。
4. **区分不同应用类型**:系统限制、SDK 限制、其他应用的行为路径不同,必须分别建模。
5. **包含开关状态**:开关"开启"和"关闭"两种状态下的行为都要覆盖。
6. **概念和路径必须有层级**:每个 concept 指定正确的 parent;每个 function_unit 输出 path 数组。
## 输出格式
**只输出 JSON,不要有 markdown 代码块标记或其他文字**:
{
"feature_name": "...",
"concepts": [
{"name": "国内", "aliases": [], "defined_in": ["2.7", "3.1"], "parent": null},
{"name": "行车娱乐限制", "aliases": [], "defined_in": ["3.1", "3.1.1"], "parent": "国内"},
...
],
"function_units": [
{
"unit_id": "FU-001",
"name": "国内-系统限制-前台-行车打断",
"description": "...",
"path": ["国内", "系统限制", "前台打断"],
"sources": [
{"section": "3.1.1", "type": "table", "row": 2, "text_snippet": "打断:车速>=15km/h且持续5秒后..."},
{"image_id": "rId16", "type": "logic_tree", "logic_tree_nodes": ["n2","n3","n8","n19","n21","n23","n25","n26"]}
]
},
...
]
}
@@ -0,0 +1,200 @@
你是吉利汽车车机系统的需求分析专家。你的任务是基于给定的精准上下文包,为单个功能单元(Function Unit)提取详细的 **IR 规则(Intermediate Representation Rule**。
## 上下文
下面是一个功能单元的精准上下文包,包含了从原始需求文档中提取的相关文字、表格和逻辑树:
### 功能单元概要
- **unit_id**: {unit_id}
- **unit_name**: {unit_name}
- **unit_description**: {unit_description}
### 相关文字段落
{texts}
### 相关表格
{tables}
### 相关逻辑树
{logic_trees}
### 图文冲突仲裁(如有)
{resolved_conflicts}
## IR Schema
你需要为这个功能单元输出一个 **规则数组(rules)**。每条规则遵循以下 schema:
```json
{{
"rule_id": "{unit_id}-DOMESTIC-SYS-FG-INTERRUPT-01",
"path": ["国内", "系统限制", "前台打断"],
"description": "国内车型,开关开启,系统限制类应用在前台,车速>=15km/h且持续>5秒且非P档时,系统打断应用前台进程、将应用调入后台,显示Toast'在行车状态下无法使用该应用'",
"priority": "P0",
"sources": [
{{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "打断:车速>=15km/h且持续5秒后..."}},
{{"type": "logic_tree", "image_id": "rId16", "node_ids": ["n2","n3","n8","n19","n21","n23","n25","n26"], "priority": "primary_source"}}
],
"precondition": {{
"geographic_scope": "国内",
"screen_type": "any",
"switch": "开启",
"app_type": "系统限制",
"app_state": "前台"
}},
"trigger": {{
"operator": "AND",
"conditions": [
{{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}},
{{"signal": "车速_持续时间", "operator": ">", "value": 5, "unit": "秒"}},
{{"signal": "档位", "operator": "!=", "value": "P"}}
]
}},
"actions": [
{{"type": "system", "description": "打断应用前台进程"}},
{{"type": "system", "description": "将应用调入后台"}},
{{"type": "user_interaction", "description": "显示Toast", "content": "在行车状态下无法使用该应用"}}
]
}}
```
## 字段说明(必读)
1. **rule_id**: 格式为 `{unit_id}-SCOPE-METHOD-BEHAVIOR-NN`,其中:
- SCOPE: DOMESTIC(国内)| OVERSEAS(海外)
- METHOD: SYS(系统限制)| SDKSDK限制)| OTHER(其他应用)
- BEHAVIOR: FG-INTERRUPT(前台打断)| BG-BLOCK(后台限制启动)| BG-PAUSE(后台暂停功能)| NO-RESTRICT(无限制)| SWITCH-OFF(开关关闭)
- NN: 序号从 01 开始
2. **path**: 层级路径数组(必填)。从 scope 到 behavior 逐级列出,如 `["国内", "系统限制", "前台打断"]`。此字段用于程序化遍历所有功能点。
3. **description**: 完整但简洁地描述整个规则,必须包含:地理范围 + 开关状态 + 应用类型 + 前后台状态 + 触发条件 + 所有动作。人读取此字段即可设计测试用例。
4. **priority**: P0(核心安全规则)、P1(重要规则)、P2(边界情况)。
5. **sources**: 每条规则必须列出所有数据来源。逻辑树类型的 source 必须标记 `"priority": "primary_source"`。文字/表格类型的 source 标记 `"priority": "supplementary"`。**node_ids 必须列举该规则在逻辑树中经历的所有 decision 和 action 节点。**
6. **precondition**: 规则生效的前置状态条件。必须包含以下字段:
- `geographic_scope`(必填):"国内" | "海外"
- `screen_type`(必填):"CSD" | "PSD" | "RFD" | "any"(如文档未区分屏幕类型则填 "any"
- `switch`:开关状态("开启" | "关闭"
- `app_type`:应用类型
- `app_state`:应用前后台状态("前台" | "后台"
如某字段不适用,可省略。
7. **trigger**: 触发条件对象:
- `operator`: "AND" | "OR"
- `conditions`: 条件数组,每个条件必须有 `signal`、`operator`、`value`。有单位加 `unit`。
- 如为瞬时事件(用户点击),用 `event` 字段。
8. **actions**: 每个动作必须有 `type`"system" | "user_interaction")和 `description`。
- `"user_interaction"` 类型必须有 `content` 字段,填写**确切的提示文案**。
- **禁止使用占位符**content 不能是"文案由业务定义"、"待定"、"自定义"等。如果文档中给出了文案,必须原样填入。如果文档确实未给出文案,填写 `"(文档未指定)"` 并标注。
## Few-shot 示例
### 示例 1:行车娱乐限制(前台打断)
**输入上下文**:国内车型,开关开启,系统限制类应用在前台,车速>=15km/h且持续>5秒且非P档时,打断应用并显示Toast"在行车状态下无法使用该应用"。
**期望输出**
```json
{{
"rule_id": "FU-001-DOMESTIC-SYS-FG-INTERRUPT-01",
"path": ["国内", "系统限制", "前台打断"],
"description": "国内车型,开关开启,系统限制类应用在前台,当车速>=15km/h且持续超过5秒且非P档时,系统打断应用前台进程、将应用调入后台,并弹出Toast提示'在行车状态下无法使用该应用'",
"priority": "P0",
"sources": [
{{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "行车娱乐限制:目标应用/功能处于前台时 ○ 打断:车速>=15km/h且持续5秒后...", "priority": "supplementary"}},
{{"type": "logic_tree", "image_id": "rId16", "node_ids": ["n2","n3","n8","n19","n21","n23","n25","n26"], "priority": "primary_source"}}
],
"precondition": {{
"geographic_scope": "国内",
"screen_type": "any",
"switch": "开启",
"app_type": "系统限制",
"app_state": "前台"
}},
"trigger": {{
"operator": "AND",
"conditions": [
{{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}},
{{"signal": "车速_持续时间", "operator": ">", "value": 5, "unit": "秒"}},
{{"signal": "档位", "operator": "!=", "value": "P"}}
]
}},
"actions": [
{{"type": "system", "description": "打断应用前台进程"}},
{{"type": "system", "description": "将应用调入后台"}},
{{"type": "user_interaction", "description": "显示Toast", "content": "在行车状态下无法使用该应用"}}
]
}}
```
### 示例 2:行车娱乐禁止(后台启动拦截)
**输入上下文**:国内车型,开关开启,应用在后台,非P档时阻止应用启动,提示"请在P挡时使用该功能/应用"。
**期望输出**
```json
{{
"rule_id": "FU-002-DOMESTIC-SYS-BG-BLOCK-01",
"path": ["国内", "系统限制", "后台限制启动"],
"description": "国内车型,开关开启,目标应用处于后台,当用户尝试启动应用且档位非P档时,系统限制应用/功能启用,并弹出Toast提示'请在P挡时使用该功能/应用'",
"priority": "P0",
"sources": [
{{"type": "table", "section": "3.1.1", "row": 2, "text_snippet": "行车娱乐禁止:目标应用/功能处于后台时 ○ 限制:非P挡时,限制目标应用/功能启用...", "priority": "supplementary"}},
{{"type": "logic_tree", "image_id": "rId17", "node_ids": ["n1","n2","n5","n7"], "priority": "primary_source"}}
],
"precondition": {{
"geographic_scope": "国内",
"screen_type": "any",
"switch": "开启",
"app_state": "后台"
}},
"trigger": {{
"operator": "AND",
"conditions": [
{{"signal": "应用请求启动", "operator": "==", "value": true}},
{{"signal": "档位", "operator": "!=", "value": "P"}}
]
}},
"actions": [
{{"type": "system", "description": "限制应用/功能启用"}},
{{"type": "user_interaction", "description": "显示Toast", "content": "请在P挡时使用该功能/应用"}}
]
}}
```
## 关键要求
1. **逻辑树为唯一权威来源**:触发条件和动作序列必须严格按逻辑树路径建模。文字/表格描述仅用于补充确切措辞(如 Toast 文案),不得覆盖或曲解逻辑树路径。在 sources 中,逻辑树类型标记 `"priority": "primary_source"`,文字/表格标记 `"priority": "supplementary"`。
2. **信号和数值必须精确**:禁止写"车速超过阈值",必须写 `{{"signal": "车速", "operator": ">=", "value": 15, "unit": "km/h"}}`。
3. **条件必须完整**:逻辑树中的每个 decision 条件必须对应 trigger.conditions 中的一条。如果文档说"车速>=15km/h 且持续超过5秒 且非P档",这三个条件必须全部出现。
4. **每条规则必须自包含**:人仅凭一条 rule JSON 就能设计出对应的测试用例。必须包含:geographic_scope、screen_type、开关状态、应用类型、前后台状态、完整触发条件、所有动作及确切 Toast 文案、来源引用。
5. **禁止占位符**`"user_interaction"` 类型的 `content` 不能是"文案由业务定义"、"待定"、"自定义"。如文档确实未给出文案,填 `"(文档未指定)"`。
6. **逻辑树节点必须追踪**:在 sources 中列出该规则在逻辑树中经历的所有 decision 节点和 action 节点。
7. **多条规则**:如果一个功能单元包含多个独立行为分支,输出多条规则分别描述。
8. **开关关闭状态**:开关关闭时所有限制失效,这也必须作为一条规则输出(path: ["...", "开关关闭", "无限制"])。
{format_feedback}
## 输出格式
**只输出 JSON 数组,不要有任何其他文字或 markdown 标记**
[
{{ ... }},
{{ ... }}
]
注意:即使只有一个规则,也必须用数组格式 `[...]`。
-105
View File
@@ -1,105 +0,0 @@
import logging
import os
import time
from typing import Optional
from openai import OpenAI
logger = logging.getLogger(__name__)
class LLMClient:
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
Usage::
llm = LLMClient()
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
print(llm.usage)
"""
IMAGE_MODEL = "qwen3-vl-plus"
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
TIMEOUT = 120
MAX_RETRIES = 3
def __init__(
self,
*,
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
timeout: int | None = None,
):
key = os.environ.get("DASHSCOPE_API_KEY", "")
if not key:
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
self._client = OpenAI(api_key=key, base_url=base_url)
self._timeout = timeout or self.TIMEOUT
self._prompt_tokens = 0
self._completion_tokens = 0
@property
def usage(self) -> dict:
"""Return accumulated token counts as ``{prompt, completion, total}``."""
return {
"prompt_tokens": self._prompt_tokens,
"completion_tokens": self._completion_tokens,
"total_tokens": self._prompt_tokens + self._completion_tokens,
}
@staticmethod
def estimate_tokens(text: str) -> int:
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
cjk = sum(1 for c in text if '' <= c <= '鿿' or ' ' <= c <= '')
other = len(text) - cjk
return max(1, int(cjk / 1.7 + other / 3.0))
@staticmethod
def estimate_image_tokens() -> int:
"""Fixed estimate for one vision-model image (~500 tokens)."""
return 500
def chat(
self, model: str, messages: list[dict], *, timeout: int | None = None,
response_format: dict | None = None,
) -> str:
"""Send a chat completion request and return the response content.
Automatically retries on failure and accumulates token usage.
"""
label = f"chat({model})"
def _call():
t0 = time.time()
kwargs = dict(model=model, messages=messages, timeout=timeout or self._timeout)
if response_format is not None:
kwargs["response_format"] = response_format
kwargs["temperature"] = 0
resp = self._client.chat.completions.create(**kwargs)
content = resp.choices[0].message.content
usg = resp.usage
if usg:
self._prompt_tokens += usg.prompt_tokens
self._completion_tokens += usg.completion_tokens
elapsed = time.time() - t0
logger.info("%s: %d chars in %.1fs", label, len(content) if content else 0, elapsed)
if not content:
raise RuntimeError("Empty response from LLM")
return content
return self._retry(_call, label)
def _retry(self, fn, label: str) -> str:
"""Call *fn()* with exponential-backoff retry."""
last_error: Optional[Exception] = None
for attempt in range(self.MAX_RETRIES):
try:
return fn()
except Exception as e:
last_error = e
logger.warning(
"%s error (attempt %d/%d): %s",
label, attempt + 1, self.MAX_RETRIES, e,
)
if attempt < self.MAX_RETRIES - 1:
time.sleep(2 ** attempt)
raise RuntimeError(f"{label}: all retries exhausted") from last_error
@@ -1,359 +0,0 @@
#!/usr/bin/env python3
"""Generate JSON intermediate representation from ``_parsed.json`` or ``_updated.json``.
Sends the JSON document directly to the LLM for analysis. If the document exceeds
``MAX_ANALYSIS_TOKENS``, sections are batched greedily without splitting any
individual section. Conflict corrections from ``resolved_conflicts`` are included
so the output respects user arbitration decisions.
Usage::
python scripts/ir_generator.py output/<basename>_updated.json [output_dir] [--dry-run]
Output: ``<basename>_ir.json``
"""
import argparse
import json
import logging
import os
import sys
import time
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from LLM import LLMClient
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
RATE_LIMIT_DELAY = 0.5
MAX_ANALYSIS_TOKENS = 6000 # max content size per LLM call
# ---------------------------------------------------------------------------
# Prompt
# ---------------------------------------------------------------------------
PROMPT = """你是一个需求文档分析助手。请分析以下需求文档的JSON内容,输出结构化JSON。
## 已知修正(来自冲突检测)
以下内容已确认修正,生成JSON时请**使用修正后的值**,不要同时输出两个版本。
{conflict_context}
## 待分析内容(JSON格式)
{content}
## JSON字段说明
- sections: 文档章节列表,每个章节含 source(章节标题)和 blocks(内容块数组)
- blocks: 类型含 para(段落,字段 text)和 table(表格,字段 rows,每行含 columns 数组)
- image_sources: 图片所在章节映射,key 为图片 rid
- image_analysis: 图片分析结果,每个含 rid、type(流程图/架构图/状态图等)、description
- resolved_conflicts: 已知修正列表,每个含 section、conflict_type、correction、source
## 功能点定义
只有满足以下**全部条件**的才视为功能点:
1. 描述了一个**系统或软件要实现的具体行为**(有触发条件、执行动作、状态变化或逻辑规则)
2. 该行为直接由**系统或框架**执行(不是人的操作流程、管理流程)
3. 对用户或系统有**可观察的效果**
**以下内容不是功能点,不要输出:**
- 术语/缩略词定义(
- 文档背景、范围说明(如 "本文档涵盖xxx"
- 变更日志、版本记录、编制人信息
- 文档结构描述(如 "产品简介用户场景说明"
- 纯文本的概述、没有具体行为的介绍
## 决策树/流程图分解规则(重要)
图片分析(image_analysis)中的流程图和决策树描述包含丰富的功能逻辑,**必须完全分解**:
1. **每个叶子路径 = 一个独立 function**:从根节点到每个最终结果的完整路径,都拆成一个 function
2. **每个判断分支 = 一个独立 function**:菱形判断节点的每个分支方向和对应的结果,单独作为一个 function
3. **不同约束条件 = 不同 function**:例如"通过接入SDK限制""通过系统限制"是不同约束机制,必须分别列出
4. **不要合并不同路径**:即使最终结果相同,只要到达路径不同,就是不同的 function
## 输出格式
只输出功能点,每个功能点格式如下:
{
"function": "功能名称",
"source": {
"section": "章节名",
"location": "原文位置(如:正文第1段、表格1第2行、图片rId13)"
},
"trigger": {
"type": "AND或者OR",
"conditions": [
"触发条件1",
"触发条件2"
]
},
"actions": {
"场景/角色": [
"动作1",
"动作2"
]
}
}
## 输出原则
1. **只输出功能点**,没有功能点就输出空数组 []
2. 每个功能点**必须**包含 source.section 和 source.location
3. location 必须是具体的原文位置标签(如 "正文第1段""表格1""图片rId13"
4. **一个 function 只对应一种行为逻辑(一条完整路径)**。决策树中的每个分支路径(从根到叶子)必须拆成独立 function,conditions 中明确写出该路径上的所有判断条件和分支方向。
5. **穷举所有分支**:流程图/决策树中的每一条分支路径都要输出对应的 function,不能遗漏任何子逻辑。
6. 没有 trigger 或 actions 的字段直接**省略**,不要写 null 或空列表/空对象
7. 所有功能点全部列出,**宁多勿漏**
8. **已知修正**中确认的信息,使用修正后的值
9. 输出一个JSON数组,不要用 ```json 代码块包裹,直接输出纯JSON
"""
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _parse_llm_response(raw: str) -> list | dict | str | None:
"""Parse JSON from LLM response, handling markdown code fences."""
if raw is None:
return None
stripped = raw.strip()
if stripped.startswith("```"):
nl = stripped.find("\n")
stripped = stripped[nl + 1:] if nl != -1 else stripped[3:]
if stripped.endswith("```"):
stripped = stripped[:-3]
try:
return json.loads(stripped)
except json.JSONDecodeError:
logger.warning(" Failed to parse JSON, returning raw text")
return raw
def _build_conflict_context(
section_name: str | None,
resolved_conflicts: list[dict],
) -> str:
"""Build conflict correction context for a section, or all if section_name is None."""
if section_name is None:
relevant = resolved_conflicts
else:
relevant = [c for c in resolved_conflicts if c.get("section", "") == section_name]
if not relevant:
return "没有"
lines: list[str] = []
for c in relevant:
correction = c.get("correction", "")
conflict_type = c.get("conflict_type", "")
source = c.get("source", "")
lines.append(f"- 冲突类型:{conflict_type},依据:{source}")
lines.append(f" 修正后的值:{correction}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# LLM analysis
# ---------------------------------------------------------------------------
def _analyze_content(
content: str,
conflict_context: str,
llm: LLMClient,
*,
dry_run: bool = False,
) -> list[dict]:
"""Send content to the LLM and return IR entries."""
prompt = PROMPT.replace("{conflict_context}", conflict_context).replace("{content}", content)
if dry_run:
est = llm.estimate_tokens(prompt)
logger.info(" [DRY RUN] prompt ~%d tokens", est)
return []
try:
raw = llm.chat(
model=LLMClient.TEXT_MODEL,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"},
)
logger.info(" Response: %d chars", len(raw))
except RuntimeError as e:
logger.error(" Analysis failed: %s", e)
return []
parsed = _parse_llm_response(raw)
if isinstance(parsed, list):
return parsed
elif isinstance(parsed, dict):
return [parsed]
else:
logger.warning(" Unparseable response, raw length: %d", len(raw))
return []
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def generate_ir(
parsed_path: str,
output_dir: str = "output",
*,
dry_run: bool = False,
) -> dict:
"""Read parsed/updated JSON and generate JSON IR.
Produces ``<basename>_ir.json`` in *output_dir*.
"""
with open(parsed_path, "r", encoding="utf-8") as f:
data = json.load(f)
basename = os.path.splitext(os.path.basename(parsed_path))[0]
for suffix in ("_parsed", "_updated"):
if basename.endswith(suffix):
basename = basename[:-len(suffix)]
break
os.makedirs(output_dir, exist_ok=True)
llm = LLMClient()
ir_output: list[dict] = []
sections = data.get("sections", [])
image_sources = data.get("image_sources", {})
image_analysis = data.get("image_analysis", [])
resolved_conflicts = data.get("resolved_conflicts", [])
# Build full document JSON to measure size
full_doc = {
"sections": sections,
"image_sources": image_sources,
"image_analysis": image_analysis,
}
full_json = json.dumps(full_doc, ensure_ascii=False)
total_chars = len(full_json)
logger.info("Total document JSON chars: %d", total_chars)
if total_chars < MAX_ANALYSIS_TOKENS:
logger.info("Document fits in one request (< %d chars)", MAX_ANALYSIS_TOKENS)
conflict_ctx = _build_conflict_context(None, resolved_conflicts)
entries = _analyze_content(full_json, conflict_ctx, llm, dry_run=dry_run)
ir_output.extend(entries)
else:
logger.info("Document is large (>= %d chars), batching sections", MAX_ANALYSIS_TOKENS)
# Filter to non-empty sections, measure effective size per section
# (section JSON + image_sources + image_analysis for images in that section)
sec_sizes = []
for sec in sections:
if not sec.get("blocks"):
continue
sec_json = json.dumps(sec, ensure_ascii=False)
sec_chars = len(sec_json)
# Add image overhead for this section
sec_name = sec.get("source", "")
sec_rids = [rid for rid, src in image_sources.items()
if src.get("section", "") == sec_name]
if sec_rids:
overhead_doc = {
"image_sources": {rid: image_sources[rid] for rid in sec_rids},
"image_analysis": [img for img in image_analysis
if img.get("rid", "") in sec_rids],
}
sec_chars += len(json.dumps(overhead_doc, ensure_ascii=False))
sec_sizes.append((sec, sec_chars))
# Greedy batch: never split a section, keep adding until next exceeds limit
i = 0
while i < len(sec_sizes):
batch = []
batch_size = 0
while i < len(sec_sizes) and batch_size + sec_sizes[i][1] <= MAX_ANALYSIS_TOKENS:
batch.append(sec_sizes[i][0])
batch_size += sec_sizes[i][1]
i += 1
if not batch:
i += 1
continue
# Collect sections and their images for this batch
batch_names = [s.get("source", "") for s in batch]
batch_image_sources = {
rid: src for rid, src in image_sources.items()
if src.get("section", "") in batch_names
}
batch_images = [
img for img in image_analysis
if image_sources.get(img.get("rid", ""), {}).get("section", "") in batch_names
]
batch_doc = {
"sections": batch,
"image_sources": batch_image_sources,
"image_analysis": batch_images,
}
batch_json = json.dumps(batch_doc, ensure_ascii=False)
# Merge conflict contexts
ctx_parts = []
for sn in batch_names:
ctx = _build_conflict_context(sn, resolved_conflicts)
if ctx != "没有":
ctx_parts.append(ctx)
conflict_ctx = "\n".join(ctx_parts) if ctx_parts else "没有"
label = " + ".join(batch_names)
logger.info("Batch [%s]: %d sections, %d chars", label, len(batch), len(batch_json))
entries = _analyze_content(batch_json, conflict_ctx, llm, dry_run=dry_run)
ir_output.extend(entries)
time.sleep(RATE_LIMIT_DELAY)
# ---- save ----------------------------------------------------------------
ir_path = os.path.join(output_dir, f"{basename}_ir.json")
os.makedirs(os.path.dirname(ir_path) or ".", exist_ok=True)
with open(ir_path, "w", encoding="utf-8") as f:
json.dump(ir_output, f, ensure_ascii=False, indent=2)
logger.info("Saved: %s (%d entries)", ir_path, len(ir_output))
# ---- summary -------------------------------------------------------------
usg = llm.usage
logger.info("Tokens: %d prompt + %d completion = %d total",
usg["prompt_tokens"], usg["completion_tokens"], usg["total_tokens"])
logger.info("Output: %s", ir_path)
return {"ir": ir_output, "path": ir_path}
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Generate JSON intermediate representation from parsed/updated JSON.",
)
parser.add_argument("input", metavar="parsed.json",
help="Path to _parsed.json or _updated.json")
parser.add_argument("output_dir", nargs="?", default="output", metavar="output_dir",
help="Directory for output files (default: output/)")
parser.add_argument("--dry-run", action="store_true",
help="Print token estimates without calling the API.")
args = parser.parse_args()
generate_ir(args.input, args.output_dir, dry_run=args.dry_run)
@@ -0,0 +1,717 @@
"""
Stage 1: Ensemble Semantic Index Generation.
Generates N parallel LLM calls with different temperatures (e.g., 0.0, 0.3, 0.7),
then deterministically merges the results via ensemble_merge (pure Python, no LLM).
The merged output includes confidence scores for each concept and function_unit.
Outputs:
- output/semantic_index_r1.json (T=0.0 raw)
- output/semantic_index_r2.json (T=0.3 raw)
- output/semantic_index_r3.json (T=0.7 raw)
- output/semantic_index.json (ensemble-merged final)
"""
import concurrent.futures
import json
import re
import sys
import time
from pathlib import Path
import config
from ensemble_merge import ensemble_merge
# ---- Path Enumeration (for prompt embedding) ----
def _traverse_nested(node: dict, image_id: str, path_nodes: list,
branch_taken: str | None) -> list[dict]:
"""DFS traversal of a logic_tree_nested node, returning leaf path records."""
node_id = node.get("id", "?")
node_type = node.get("type", "?")
node_name = node.get("name", "")
path_nodes = path_nodes + [{
"id": node_id,
"type": node_type,
"label": node_name,
"branch_taken": branch_taken,
}]
if node_type == "end":
return [_make_path_record(path_nodes, image_id)]
children = node.get("children", [])
if not children:
return [_make_path_record(path_nodes, image_id)]
all_paths = []
for child in children:
# Decision nodes have {condition, node} wrappers; others are direct node dicts
if node_type == "decision":
condition = child.get("condition", "")
child_node = child.get("node", child)
else:
condition = "(implicit)"
child_node = child
all_paths.extend(
_traverse_nested(child_node, image_id, path_nodes, condition)
)
return all_paths
def _make_path_record(path_nodes: list, image_id: str) -> dict:
"""Build a path record from a completed node chain."""
action_nodes = [n for n in path_nodes if n["type"] == "action"]
decision_nodes = [n for n in path_nodes if n["type"] == "decision"]
node_ids = [n["id"] for n in path_nodes]
return {
"path_id": f"PATH-{image_id}-{'-'.join(node_ids)}",
"nodes": path_nodes,
"meaning": _describe_path(path_nodes),
"image_id": image_id,
"action_nodes": action_nodes,
"decision_nodes": decision_nodes,
"node_ids": node_ids,
}
def enumerate_logic_tree_paths(nested_tree: dict, image_id: str = "") -> list[dict]:
"""Enumerate all root-to-leaf paths from a logic_tree_nested structure.
Uses the nested tree directly (no flat-list adjacency). Decision nodes
fork by {condition, node} branches; other nodes have direct children.
"""
if not nested_tree:
return []
return _traverse_nested(nested_tree, image_id, [], None)
def _describe_path(path_nodes: list[dict]) -> str:
"""Generate a human-readable description of a logic tree path."""
parts = []
for n in path_nodes:
label = n["label"]
if n["branch_taken"] and n["branch_taken"] != "(implicit)":
label = f"{label}{n['branch_taken']}"
parts.append(label)
return "".join(parts)
def enumerate_all_paths(doc: dict) -> dict[str, list[dict]]:
"""Enumerate paths for all logic trees in the document.
Uses logic_tree_nested when available (proper tree), falling back to
flat logic_tree. Returns {image_id: [path, ...]}.
"""
result = {}
for img in doc.get("image_analysis", []):
rid = img.get("rid", "")
if not rid:
continue
nested = img.get("logic_tree_nested")
if nested:
result[rid] = enumerate_logic_tree_paths(nested, image_id=rid)
else:
lt = img.get("logic_tree")
if lt and lt.get("nodes"):
lt["image_id"] = rid
result[rid] = _enumerate_flat_tree(lt)
elif lt:
result[rid] = []
return result
def _enumerate_flat_tree(tree: dict) -> list[dict]:
"""Fallback: enumerate paths from flat logic_tree using adjacency.
Handles start/process/action/state nodes as implicit chain links.
"""
nodes = tree.get("nodes", [])
if not nodes:
return []
node_map = {n["id"]: n for n in nodes}
image_id = tree.get("image_id", "")
# Find root: first start/state node, or first process node, or first node
root = None
for n in nodes:
if n["type"] in ("start", "state"):
root = n
break
if root is None:
for n in nodes:
if n["type"] == "process":
root = n
break
if root is None:
root = nodes[0]
adj = _build_adjacency(nodes, node_map)
paths = []
def dfs(current_id, visited, path_nodes, branch_taken):
if current_id in visited:
return
new_visited = visited | {current_id}
node = node_map.get(current_id)
if node is None:
return
path_nodes = path_nodes + [{
"id": current_id,
"type": node["type"],
"label": node.get("description") or node.get("condition", ""),
"branch_taken": branch_taken,
}]
outgoing = adj.get(current_id, [])
if not outgoing:
action_nodes = [n for n in path_nodes if n["type"] == "action"]
decision_nodes = [n for n in path_nodes if n["type"] == "decision"]
node_ids = [n["id"] for n in path_nodes]
paths.append({
"path_id": f"PATH-{image_id}-{'-'.join(node_ids)}",
"nodes": path_nodes,
"meaning": _describe_path(path_nodes),
"image_id": image_id,
"action_nodes": action_nodes,
"decision_nodes": decision_nodes,
"node_ids": node_ids,
})
else:
for branch_val, target_id in outgoing:
dfs(target_id, new_visited, path_nodes, branch_val)
dfs(root["id"], set(), [], None)
return paths
def _build_adjacency(nodes, node_map):
"""Build {node_id: [(branch_value, target_id)]} adjacency for flat trees.
Handles: decision branches (explicit), non-branching nodes (implicit sequential).
"""
NON_BRANCHING = {"start", "process", "state", "action"}
adj = {}
has_explicit_incoming = set()
for n in nodes:
for br in n.get("branches", []):
has_explicit_incoming.add(br["target"])
for i, node in enumerate(nodes):
nid = node["id"]
adj.setdefault(nid, [])
# Explicit edges from decision nodes
for br in node.get("branches", []):
adj[nid].append((br["value"], br["target"]))
# Implicit edges for non-branching nodes (start/process/state/action)
if node["type"] in NON_BRANCHING and not node.get("branches"):
j = i + 1
targets = []
while j < len(nodes):
next_node = nodes[j]
next_nid = next_node["id"]
if next_nid in has_explicit_incoming:
break
if next_node["type"] in NON_BRANCHING | {"end"}:
targets.append(next_nid)
has_explicit_incoming.add(next_nid)
j += 1
continue
elif next_node["type"] == "decision":
if not targets:
targets.append(next_nid)
break
j += 1
for t in targets:
adj[nid].append(("(implicit)", t))
return adj
def format_paths_for_prompt(all_paths: dict[str, list[dict]]) -> str:
"""Format enumerated paths as a readable list for the LLM prompt."""
if not all_paths:
return "(无逻辑树路径)"
lines = []
for image_id, paths in all_paths.items():
lines.append(f"\n### {image_id} 的全部决策路径(共 {len(paths)} 条):")
for i, path in enumerate(paths, 1):
lines.append(f"\n**路径 {i}** (ID: {path['path_id']})")
lines.append(f" 含义: {path['meaning']}")
lines.append(f" 节点: {path['node_ids']}")
lines.append(f" 决策节点: {[n['id'] for n in path['decision_nodes']]}")
lines.append(f" 动作节点: {[n['id'] for n in path['action_nodes']]}")
return "\n".join(lines)
# ---- Document Formatting ----
def format_document_for_prompt(doc: dict) -> str:
"""Render the full parsed document as a readable string for the LLM prompt."""
lines = []
lines.append("=== SECTIONS ===")
for i, section in enumerate(doc.get("sections", [])):
source = section.get("source", f"(无标题-章节{i})")
lines.append(f"\n--- Section: {source} ---")
for block in section.get("blocks", []):
if block["type"] == "para":
lines.append(f"[段落 {block['index']}] {block['text']}")
elif block["type"] == "table":
lines.append(f"[表格 {block.get('table', '?')}]")
headers = block.get("headers", [])
lines.append(f" 表头: {' | '.join(headers)}")
for row in block.get("rows", []):
cols = row.get("columns", [])
cell_texts = []
for c in cols:
cell_texts.append(
f"[行{c.get('row','?')}]{c.get('name','')}: {c.get('text','')}"
)
lines.append(f" {'; '.join(cell_texts)}")
images = section.get("images", [])
if images:
lines.append(f" 图片引用: {', '.join(images)}")
lines.append("\n\n=== IMAGE_ANALYSIS (流程图逻辑树) ===")
for img in doc.get("image_analysis", []):
rid = img.get("rid", "?")
img_type = img.get("type", "?")
lines.append(f"\n--- Image: {rid} (type={img_type}) ---")
lines.append(f" 描述: {img.get('description', '')[:300]}")
lt = img.get("logic_tree")
if lt:
lines.append(f" 逻辑树根节点: {lt.get('root', '?')}")
lines.append(" 节点详情:")
for node in lt.get("nodes", []):
nid = node.get("id", "?")
ntype = node.get("type", "?")
desc = node.get("description", "") or node.get("condition", "")
lines.append(f" [{ntype}] {nid}: {desc}")
branches = node.get("branches", [])
if branches:
for br in branches:
lines.append(f"{br['value']}{br['target']}")
conflicts = doc.get("resolved_conflicts", [])
if conflicts:
lines.append("\n\n=== RESOLVED_CONFLICTS (图文冲突仲裁) ===")
for c in conflicts:
lines.append(
f" [{c.get('conflict_type','?')}] {c.get('section','?')}: "
f"{c.get('source','?')}为准 — {c.get('correction','')}"
)
return "\n".join(lines)
# ---- Prompt Building ----
def build_prompt(doc: dict, feedback: str = "", all_paths: dict | None = None) -> str:
"""Load the prompt template and inject the formatted document + paths + feedback."""
template_path = Path(config.PROMPTS_DIR) / "step1_semantic_index.txt"
template = template_path.read_text(encoding="utf-8")
formatted_doc = format_document_for_prompt(doc)
prompt = template.replace("{document_json}", formatted_doc)
if all_paths is None:
all_paths = enumerate_all_paths(doc)
path_text = format_paths_for_prompt(all_paths)
prompt = prompt.replace("{logic_tree_paths}", path_text)
if feedback:
prompt = prompt.replace("{feedback}", feedback)
else:
prompt = prompt.replace("{feedback}", "")
return prompt
# ---- Validation ----
def _quick_validate(
semantic_index: dict, doc: dict, all_paths: dict | None = None
) -> tuple[bool, dict]:
"""Validate semantic index and return (passed, gaps).
Uses a single COVERAGE_TARGET threshold (default 0.95).
"""
gaps = {
"missing_paths": [],
"missing_concepts": [],
"format_issues": [],
"parent_issues": [],
}
units = semantic_index.get("function_units", [])
concepts = semantic_index.get("concepts", [])
# --- Check function_units non-empty ---
if not units:
gaps["format_issues"].append("function_units 为空")
return False, gaps
# --- Check each function_unit has path ---
for fu in units:
uid = fu.get("unit_id", "?")
if not fu.get("path"):
gaps["format_issues"].append(f"{uid}: 缺少 path 字段")
if not fu.get("sources"):
gaps["format_issues"].append(f"{uid}: 缺少 sources")
# --- Logic tree node coverage ---
all_nodes = _collect_logic_tree_nodes(doc)
referenced = _collect_referenced_nodes(units)
threshold = config.COVERAGE_TARGET
for image_id, node_set in all_nodes.items():
ref_set = referenced.get(image_id, set())
checkable = {
nid for nid, ntype in node_set.items()
if ntype in ("decision", "action")
}
if not checkable:
continue
covered = checkable & ref_set
coverage = len(covered) / len(checkable) if checkable else 1.0
if coverage < threshold:
missing = checkable - ref_set
gaps["missing_paths"].append(
f"{image_id}: 覆盖率 {coverage:.0%} < {threshold:.0%}, "
f"未覆盖节点: {sorted(missing)}"
)
# --- Check logic tree path consistency ---
# A unit's logic_tree_nodes must form a valid (connected) path in the tree.
if all_paths is not None:
for fu in units:
uid = fu.get("unit_id", "?")
for src in fu.get("sources", []):
if src.get("type") != "logic_tree":
continue
image_id = src.get("image_id", "")
unit_nodes = set(src.get("logic_tree_nodes", []))
if not unit_nodes:
continue
# Check if there exists a path containing all these nodes
valid = False
for path in all_paths.get(image_id, []):
path_nodes = set(path.get("node_ids", []))
if unit_nodes.issubset(path_nodes):
valid = True
break
if not valid:
gaps["format_issues"].append(
f"{uid}: logic_tree_nodes 不构成有效路径 "
f"(image={image_id}, nodes={sorted(unit_nodes)})"
)
# --- Check for trivial units (only state/switch nodes, no actions) ---
if all_paths is not None:
for fu in units:
uid = fu.get("unit_id", "?")
has_logic_ref = False
has_action = False
has_non_trivial_decision = False
for src in fu.get("sources", []):
if src.get("type") != "logic_tree":
continue
has_logic_ref = True
node_ids = src.get("logic_tree_nodes", [])
node_types = {}
for image_id, nset in all_nodes.items():
for nid in node_ids:
if nid in nset:
node_types[nid] = nset[nid]
for nid in node_ids:
ntype = node_types.get(nid, "")
if ntype == "action":
has_action = True
# Count decisions beyond first level (e.g., n1/n2 are just root+switch)
decisions = [nid for nid in node_ids
if node_types.get(nid, "") == "decision"]
if len(decisions) > 1:
has_non_trivial_decision = True
if has_logic_ref and not has_action and not has_non_trivial_decision:
gaps["format_issues"].append(
f"{uid}: 可能为空壳单元(仅有state/开关节点,无action或深层decision"
)
# --- Concept parent validity ---
concept_names = {c["name"] for c in concepts}
for c in concepts:
name = c.get("name", "?")
parent = c.get("parent") # can be None for scope-level
if parent is not None and parent not in concept_names:
gaps["parent_issues"].append(
f"concept '{name}' 的 parent '{parent}' 不存在"
)
# Warn about scope-level concepts without parent=null
for c in concepts:
if c.get("parent") is not None:
continue
name = c.get("name", "")
# Scope-level concepts (国内/海外) should have parent=null
if name not in ("国内", "海外", ""):
gaps["parent_issues"].append(
f"concept '{name}' 的 parent 为 null,但它可能不是 scope 概念"
)
# --- Check for missing scope concepts ---
if "国内" not in concept_names:
gaps["missing_concepts"].append("缺少 scope 概念: 国内")
if "海外" not in concept_names and any(
"海外" in s.get("source", "") for s in doc.get("sections", [])
):
gaps["missing_concepts"].append("缺少 scope 概念: 海外")
passed = (
not gaps["missing_paths"]
and not gaps["format_issues"]
and not gaps["parent_issues"]
)
return passed, gaps
def _collect_logic_tree_nodes(doc: dict) -> dict[str, dict[str, str]]:
"""Return {image_id: {node_id: node_type}} for all logic trees."""
result = {}
for img in doc.get("image_analysis", []):
lt = img.get("logic_tree")
rid = img.get("rid", "")
if lt and rid:
result[rid] = {n["id"]: n["type"] for n in lt.get("nodes", [])}
return result
def _collect_referenced_nodes(units: list[dict]) -> dict[str, set[str]]:
"""Return {image_id: {referenced node_ids}} across all function_units."""
refs = {}
for fu in units:
for src in fu.get("sources", []):
if src.get("type") == "logic_tree":
image_id = src.get("image_id", "")
if image_id not in refs:
refs[image_id] = set()
refs[image_id].update(src.get("logic_tree_nodes", []))
return refs
# ---- LLM Calls ----
def extract_json_from_response(text: str) -> str:
"""Robustly extract JSON from LLM response."""
m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text)
if m:
return m.group(1).strip()
start = text.find("{")
if start == -1:
raise ValueError("No JSON object found in LLM response")
depth = 0
for i in range(start, len(text)):
if text[i] == "{":
depth += 1
elif text[i] == "}":
depth -= 1
if depth == 0:
return text[start : i + 1]
raise ValueError("Unclosed JSON object in LLM response")
def call_llm(prompt: str, max_retries: int = 2,
temperature: float | None = None) -> dict:
"""Send prompt to LLM, return parsed JSON dict.
Args:
temperature: Override config.TEMPERATURE. If None, uses config default.
"""
client = config.llm_client()
temp = temperature if temperature is not None else config.TEMPERATURE
for attempt in range(max_retries + 1):
print(f" LLM 调用 T={temp} (尝试 {attempt + 1}/{max_retries + 1})...", flush=True)
try:
resp = client.chat.completions.create(
model=config.MODEL_NAME,
messages=[
{
"role": "system",
"content": "你是一个精确的 JSON 输出引擎。只输出合法的 JSON。",
},
{"role": "user", "content": prompt},
],
temperature=temp,
max_tokens=config.MAX_TOKENS,
)
content = resp.choices[0].message.content
if content is None:
raise RuntimeError("LLM returned empty response")
json_str = extract_json_from_response(content)
return json.loads(json_str)
except (json.JSONDecodeError, ValueError) as e:
print(f" JSON 解析失败: {e}")
if attempt < max_retries:
time.sleep(2)
raise RuntimeError("无法从 LLM 响应中解析 JSON")
# ---- Ensemble Orchestration ----
def run_ensemble_semantic_index(doc: dict) -> dict:
"""Run N parallel LLM calls at different temperatures, then ensemble-merge.
1. Enumerate all logic tree paths (once).
2. Build the prompt (once — no iterative feedback needed).
3. Launch len(ENSEMBLE_TEMPERATURES) parallel LLM calls via ThreadPoolExecutor.
4. Collect all results.
5. Call ensemble_merge() for deterministic merge.
6. Validate final output with _quick_validate().
7. Save individual version outputs + merged output.
"""
all_paths = enumerate_all_paths(doc)
print(f" 已枚举逻辑树路径: {sum(len(v) for v in all_paths.values())}")
prompt = build_prompt(doc, "", all_paths)
print(f" Prompt 长度: {len(prompt)} 字符")
temperatures = config.ENSEMBLE_TEMPERATURES
print(f" 集成温度: {temperatures}")
# Parallel LLM calls
raw_results: list[tuple[int, float, dict]] = []
with concurrent.futures.ThreadPoolExecutor(
max_workers=len(temperatures)
) as executor:
future_to_meta = {}
for i, temp in enumerate(temperatures):
future = executor.submit(call_llm, prompt, 2, temp)
future_to_meta[future] = (i, temp)
for future in concurrent.futures.as_completed(future_to_meta):
idx, temp = future_to_meta[future]
try:
si = future.result()
n_units = len(si.get("function_units", []))
n_concepts = len(si.get("concepts", []))
print(f" T={temp}: {n_concepts} 概念, {n_units} 功能单元")
raw_results.append((idx, temp, si))
except Exception as e:
print(f" T={temp}: FAIL — {e}")
raw_results.append((idx, temp, {
"feature_name": "", "concepts": [], "function_units": []
}))
if not raw_results:
raise RuntimeError("所有集成的 LLM 调用均失败")
# Sort by temperature for determinism
raw_results.sort(key=lambda x: x[1])
semantic_indices = [r[2] for r in raw_results]
# Save individual version outputs
version_paths = {
0: config.SEMANTIC_INDEX_R1_JSON,
1: config.SEMANTIC_INDEX_R2_JSON,
2: config.SEMANTIC_INDEX_R3_JSON,
}
for i, si in enumerate(semantic_indices):
out_path = version_paths.get(i)
if out_path:
config.save_json(si, out_path)
print(f" 保存版本 {i} (T={temperatures[i]}): {out_path}")
# Ensemble merge
print(f"\n 集成合并 {len(semantic_indices)} 个版本...")
merged = ensemble_merge(semantic_indices)
merged["ensemble_temperatures"] = list(temperatures)
# Validate
passed, gaps = _quick_validate(merged, doc, all_paths)
merged["validation_passed"] = passed
merged["validation_gaps"] = {
k: v for k, v in gaps.items() if v
}
# Print summary
cs = merged.get("confidence_summary", {})
print(f" 合并后: {cs.get('total_concepts', 0)} 概念, "
f"{cs.get('total_units', 0)} 功能单元")
print(f" 置信度: high={cs.get('high', 0)}, medium={cs.get('medium', 0)}, "
f"low={cs.get('low', 0)}")
print(f" 验证: {'PASS' if passed else 'GAPS FOUND'}")
if not passed:
for k, v in gaps.items():
if v:
print(f" {k}: {len(v)} 个问题")
return merged
# ---- Main ----
def main():
print("=" * 60)
print("阶段一:集成语义索引 (Ensemble Semantic Index)")
print("=" * 60)
# 1. Load input
print(f"\n[1/3] 加载输入文档: {config.INPUT_JSON}")
doc = config.load_input_document()
print(f" 已加载 {len(doc.get('sections', []))} 个 section, "
f"{len(doc.get('image_analysis', []))} 张图片分析")
# 2. Run ensemble generation + merge
print(f"\n[2/3] 运行集成语义索引 ({len(config.ENSEMBLE_TEMPERATURES)} 个温度版本)...")
merged_index = run_ensemble_semantic_index(doc)
# 3. Save outputs
print(f"\n[3/3] 保存最终语义索引: {config.SEMANTIC_INDEX_JSON}")
config.save_json(merged_index, config.SEMANTIC_INDEX_JSON)
# Also save path enumeration for downstream use
all_paths = enumerate_all_paths(doc)
config.save_json(
{"logic_tree_paths": {k: v for k, v in all_paths.items()}},
config.PATH_ENUM_JSON,
)
print(f" 路径枚举: {config.PATH_ENUM_JSON}")
cs = merged_index.get("confidence_summary", {})
n_concepts = cs.get("total_concepts", len(merged_index.get("concepts", [])))
n_units = cs.get("total_units", len(merged_index.get("function_units", [])))
n_versions = merged_index.get("ensemble_versions", len(config.ENSEMBLE_TEMPERATURES))
print(f"\n完成! {n_versions} 版本集成, {n_concepts} 个概念, {n_units} 个功能单元.")
print(f"输出: {config.SEMANTIC_INDEX_JSON}")
if __name__ == "__main__":
main()
@@ -0,0 +1,399 @@
"""
Stage 2.5: Branch Coverage Auto-Completion.
1. Enumerates all root-to-leaf paths in every logic tree
2. Compares paths against existing IR rules to find uncovered paths
3. Generates synthetic function_units for uncovered paths
4. Calls LLM (same extract_rules_for_unit) to produce rules for synthetic units
5. Iterates up to MAX_RETRIES_PER_STAGE rounds to reach COVERAGE_TARGET
Outputs:
- output/path_enumeration.json
- output/ir_autocomplete_fragments.json
"""
import concurrent.futures
import json
import time
from pathlib import Path
import config
# ---- Path Enumeration (shared with step1, duplicated for module independence) ----
def enumerate_all_paths(doc: dict) -> dict[str, list[dict]]:
"""Enumerate all root-to-leaf paths for every logic tree."""
from step1_semantic_index import enumerate_all_paths as _enum
return _enum(doc)
# ---- Coverage Analysis ----
def find_referenced_path_ids(rules: list[dict]) -> dict[str, set[str]]:
"""Map each rule to the set of logic tree nodes it references.
Returns {rule_id: set of "image_id:node_id" pairs}
"""
result = {}
for rule in rules:
rid = rule.get("rule_id", "?")
refs = set()
for src in rule.get("sources", []):
if src.get("type") == "logic_tree":
image_id = src.get("image_id", "")
for nid in src.get("node_ids", []):
refs.add(f"{image_id}:{nid}")
result[rid] = refs
return result
def compute_path_coverage(
all_paths: dict[str, list[dict]], rules: list[dict]
) -> tuple[list[dict], list[dict], dict]:
"""Compute coverage of enumerated paths by existing rules.
Returns (covered_paths, uncovered_paths, stats).
A path is "covered" if at least one rule's node_ids form a superset
of the path's decision+action nodes for that image.
"""
# Build per-rule node sets keyed by image_id
rule_node_sets = {} # {rule_id: {image_id: set(node_ids)}}
for rule in rules:
rid = rule.get("rule_id", "?")
rule_node_sets[rid] = {}
for src in rule.get("sources", []):
if src.get("type") == "logic_tree":
image_id = src.get("image_id", "")
rule_node_sets[rid].setdefault(image_id, set()).update(
src.get("node_ids", [])
)
covered = []
uncovered = []
for image_id, paths in all_paths.items():
for path in paths:
# Get checkable nodes for this path (decision + action)
checkable = set(
n["id"] for n in path["nodes"]
if n["type"] in ("decision", "action")
)
if not checkable:
# Path with no decision/action nodes — trivially covered
covered.append(path)
continue
path_covered = False
for rid, img_sets in rule_node_sets.items():
rule_nodes = img_sets.get(image_id, set())
if checkable.issubset(rule_nodes):
path_covered = True
break
if path_covered:
covered.append(path)
else:
uncovered.append(path)
total = len(covered) + len(uncovered)
stats = {
"total_paths": total,
"covered_paths": len(covered),
"uncovered_paths": len(uncovered),
"coverage_pct": round(len(covered) / total * 100, 1) if total > 0 else 100.0,
}
return covered, uncovered, stats
# ---- Synthetic Function Unit Generation ----
def generate_synthetic_unit(path: dict, unit_seq: int) -> dict:
"""Create a synthetic function_unit from an uncovered logic tree path.
Infers preconditions and trigger from the decision nodes along the path.
"""
node_map = {n["id"]: n for n in path["nodes"]}
# Infer switch state from path
switch = _infer_switch_state(path)
# Infer app_type from path
app_type = _infer_app_type(path)
# Infer app_state from path
app_state = _infer_app_state(path)
# Infer geographic_scope from section context
scope = _infer_scope(path)
# Build description from path meaning
description = f"自动补全: {path.get('meaning', '')}"
if switch:
description = f"开关{switch}, {description}"
# Build path list
path_labels = []
if scope:
path_labels.append(scope)
if switch:
path_labels.append(f"开关{switch}")
if app_type:
path_labels.append(app_type)
if app_state:
path_labels.append(app_state)
# Add behavior from terminal action
action_nodes = path.get("action_nodes", [])
if action_nodes:
last_action = action_nodes[-1].get("label", "")
path_labels.append(last_action[:20])
unit_id = f"FU-AUTO-{path['image_id']}-{unit_seq:03d}"
seq = f"{unit_seq:03d}"
return {
"unit_id": unit_id,
"name": f"自动补全-{path.get('meaning', '')[:60]}",
"description": description,
"path": path_labels,
"auto_generated": True,
"sources": [
{
"section": "",
"type": "logic_tree",
"image_id": path["image_id"],
"logic_tree_nodes": path.get("node_ids", []),
}
],
}
def _infer_switch_state(path: dict) -> str:
"""Infer switch state from decision nodes in path."""
for n in path["nodes"]:
label = n.get("label", "")
branch = n.get("branch_taken", "")
if "开关" in label and n["type"] == "decision":
if branch == "开启":
return "开启"
elif branch == "关闭":
return "关闭"
return ""
def _infer_app_type(path: dict) -> str:
"""Infer app type from state nodes in path."""
type_map = {
"其他应用": "其他应用",
"SDK限制": "SDK限制",
"通过接入SDK限制的应用": "SDK限制",
"系统限制": "系统限制",
"通过系统限制应用": "系统限制",
}
for n in path["nodes"]:
if n["type"] == "state":
for key, val in type_map.items():
if key in n.get("label", ""):
return val
return ""
def _infer_app_state(path: dict) -> str:
"""Infer app state (前台/后台) from decision nodes."""
for n in path["nodes"]:
label = n.get("label", "")
branch = n.get("branch_taken", "")
if "前台" in label:
if branch == "":
return "前台"
elif branch == "":
return "后台"
return ""
def _infer_scope(path: dict) -> str:
"""Infer geographic scope. Defaults to 国内."""
return "国内"
# ---- LLM Extraction for Synthetic Units ----
def extract_rules_for_synthetic_units(
synthetic_units: list[dict], doc: dict, max_retries: int | None = None
) -> list[dict]:
"""Extract IR rules for synthetic function_units using step2's LLM logic."""
from step2_ir_extraction import (
build_document_lookup,
extract_context_package,
extract_rules_for_unit,
)
if max_retries is None:
max_retries = config.MAX_RETRIES_PER_STAGE
sections_by_source, image_by_rid, conflicts_by_section = build_document_lookup(doc)
fragments = []
for unit in synthetic_units:
pkg = extract_context_package(
unit, doc, sections_by_source, image_by_rid, conflicts_by_section
)
# Enrich pkg with unit's own path and description
pkg["unit_path"] = unit.get("path", [])
pkg["unit_description"] = unit.get("description", pkg["unit_description"])
try:
rules = extract_rules_for_unit(pkg, max_retries)
except Exception as e:
rules = []
fragments.append({
"unit_id": unit["unit_id"],
"unit_name": unit.get("name", ""),
"rules": rules,
"auto_generated": True,
})
print(f" {unit['unit_id']}: {len(rules)} 条规则")
return fragments
# ---- Iterative Auto-Completion ----
def run_autocomplete(
all_paths: dict[str, list[dict]],
existing_rules: list[dict],
doc: dict,
) -> tuple[list[dict], dict]:
"""Run iterative auto-completion. Returns (autocomplete_fragments, final_stats)."""
print(f"\n 初始路径覆盖率分析...")
covered, uncovered, stats = compute_path_coverage(all_paths, existing_rules)
print(f" 覆盖: {stats['covered_paths']}/{stats['total_paths']} "
f"({stats['coverage_pct']}%)")
if not uncovered:
print(f" 所有路径已覆盖,无需自动补全")
return [], stats
print(f" 未覆盖路径: {len(uncovered)}")
all_fragments = []
best_stats = stats
for round_n in range(1, config.MAX_RETRIES_PER_STAGE + 1):
if not uncovered:
break
print(f"\n--- 自动补全 第 {round_n} 轮 ---")
print(f"{len(uncovered)} 条未覆盖路径生成合成单元...")
# Generate synthetic units
start_seq = (round_n - 1) * len(uncovered) + 1
synthetic_units = [
generate_synthetic_unit(path, start_seq + i)
for i, path in enumerate(uncovered)
]
# Extract rules via LLM
max_llm_workers = min(2, len(synthetic_units))
if len(synthetic_units) <= 1:
fragments = extract_rules_for_synthetic_units(synthetic_units, doc)
else:
# Sequential to avoid flooding the API
fragments = extract_rules_for_synthetic_units(synthetic_units, doc)
all_fragments.extend(fragments)
# Re-compute coverage
all_rules = existing_rules + [
rule for f in fragments for rule in f.get("rules", [])
]
covered, uncovered, stats = compute_path_coverage(all_paths, all_rules)
print(f"{round_n} 轮后覆盖: {stats['covered_paths']}/{stats['total_paths']} "
f"({stats['coverage_pct']}%)")
if stats["coverage_pct"] > best_stats["coverage_pct"]:
best_stats = stats
if stats["coverage_pct"] >= config.COVERAGE_TARGET * 100:
print(f" 达到目标覆盖率 {config.COVERAGE_TARGET:.0%},停止")
break
# If coverage didn't improve, try a different approach next round
uncovered_decision_nodes = set()
for p in uncovered:
for n in p.get("decision_nodes", []):
uncovered_decision_nodes.add(n.get("label", ""))
if not uncovered_decision_nodes:
print(f" 无更多可补全路径,停止")
break
return all_fragments, best_stats
# ---- Main ----
def main():
print("=" * 60)
print("阶段 2.5:分支覆盖自动补全")
print("=" * 60)
# 1. Load inputs
print(f"\n[1/5] 加载输入...")
doc = config.load_input_document()
fragments = config.load_json(config.IR_FRAGMENTS_JSON)
all_rules = []
for f in fragments:
all_rules.extend(f.get("rules", []))
print(f" 已有规则: {len(all_rules)}")
# 2. Enumerate paths
print(f"\n[2/5] 枚举逻辑树路径...")
all_paths = enumerate_all_paths(doc)
total_paths = sum(len(v) for v in all_paths.values())
print(f"{total_paths} 条路径")
# Save path enumeration for downstream audit
path_enum_data = {
"logic_tree_paths": {
k: [{kk: vv for kk, vv in p.items() if kk != "nodes"} for p in v]
for k, v in all_paths.items()
},
"total_paths": total_paths,
}
config.save_json(path_enum_data, config.PATH_ENUM_JSON)
# 3. Run auto-completion
print(f"\n[3/5] 运行自动补全...")
autocomplete_fragments, final_stats = run_autocomplete(
all_paths, all_rules, doc
)
# 4. Save
print(f"\n[4/5] 保存自动补全片段...")
config.save_json(
autocomplete_fragments, config.IR_AUTOCOMPLETE_FRAGMENTS_JSON
)
print(f" 输出: {config.IR_AUTOCOMPLETE_FRAGMENTS_JSON}")
print(f" 生成 {len(autocomplete_fragments)} 个补全片段")
# 5. Summary
print(f"\n[5/5] 完成!")
print(f" 最终路径覆盖: {final_stats['covered_paths']}/{final_stats['total_paths']} "
f"({final_stats['coverage_pct']}%)")
if final_stats["coverage_pct"] < config.COVERAGE_TARGET * 100:
remaining = final_stats["total_paths"] - final_stats["covered_paths"]
print(f" WARN: {remaining} 条路径仍未覆盖,将在审计报告中列出")
if __name__ == "__main__":
main()
@@ -0,0 +1,508 @@
"""
Stage 2: Per Function Unit IR Extraction.
For each function unit from the semantic index, constructs a precision context
package and calls the LLM to extract detailed IR rules.
Runs multiple LLM calls in parallel (up to MAX_CONCURRENCY).
Output: output/ir_fragments.json
"""
import concurrent.futures
import json
import re
import sys
import time
from pathlib import Path
import config
MAX_CONCURRENCY = 3 # Max parallel LLM calls
def load_semantic_index() -> dict:
"""Load the semantic index from Stage 1."""
return config.load_json(config.SEMANTIC_INDEX_JSON)
def build_document_lookup(doc: dict):
"""Build lookup structures for fast context extraction from the document."""
# sections_by_source: "3.1.1" -> section dict
sections_by_source = {}
for section in doc.get("sections", []):
source = section.get("source", "")
# Normalize: extract leading number like "3.1.1"
parts = source.split()
if parts:
key = parts[0].strip()
sections_by_source[key] = section
# image_by_rid: "rId16" -> image_analysis entry
image_by_rid = {}
for img in doc.get("image_analysis", []):
rid = img.get("rid", "")
if rid:
image_by_rid[rid] = img
# Conflicts indexed by section
conflicts_by_section = {}
for c in doc.get("resolved_conflicts", []):
section = c.get("section", "")
key = section.split()[0] if section else ""
conflicts_by_section.setdefault(key, []).append(c)
return sections_by_source, image_by_rid, conflicts_by_section
def extract_context_package(
fu: dict, doc: dict, sections_by_source: dict, image_by_rid: dict,
conflicts_by_section: dict
) -> dict:
"""Build a precision context package for a single function unit."""
texts = []
tables = []
logic_trees = []
seen_sections = set()
seen_images = set()
for src in fu.get("sources", []):
src_type = src.get("type", "")
section_key = src.get("section", "").split()[0] if src.get("section") else ""
# --- Text source ---
if src_type in ("table", "para") and section_key:
if section_key in seen_sections:
continue
seen_sections.add(section_key)
section = sections_by_source.get(section_key)
if section is None:
# Fuzzy match by prefix
for key in sections_by_source:
if key.startswith(section_key):
section = sections_by_source[key]
break
if section:
for block in section.get("blocks", []):
if block["type"] == "para":
texts.append({
"section": section_key,
"text": block["text"]
})
elif block["type"] == "table":
row_num = src.get("row") if src_type == "table" else None
if row_num is not None:
# Extract only the specific row
matching_rows = []
for r in block.get("rows", []):
for c in r.get("columns", []):
if c.get("row") == row_num:
matching_rows.append({
"headers": block.get("headers", []),
"cells": {
col["name"]: col["text"]
for col in r["columns"]
},
"row": row_num
})
break
tables.append({
"section": section_key,
"headers": block.get("headers", []),
"rows": matching_rows,
"all_rows": [
{
"row": col.get("row"),
"name": col.get("name"),
"text": col.get("text")
}
for row in block.get("rows", [])
for col in row.get("columns", [])
]
})
else:
# Include full table
tables.append({
"section": section_key,
"headers": block.get("headers", []),
"all_rows": [
{
"row": col.get("row"),
"name": col.get("name"),
"text": col.get("text")
}
for row in block.get("rows", [])
for col in row.get("columns", [])
]
})
# --- Logic tree source ---
if src_type == "logic_tree":
image_id = src.get("image_id", "")
if not image_id or image_id in seen_images:
continue
seen_images.add(image_id)
img = image_by_rid.get(image_id)
if img:
lt = img.get("logic_tree")
if lt:
logic_trees.append({
"image_id": image_id,
"description": img.get("description", ""),
"tree": lt
})
# Include relevant resolved conflicts
relevant_conflicts = []
for section_key in seen_sections:
for c in conflicts_by_section.get(section_key, []):
relevant_conflicts.append(c)
return {
"unit_id": fu["unit_id"],
"unit_name": fu.get("name", ""),
"unit_description": fu.get("description", ""),
"unit_path": fu.get("path", []),
"texts": texts,
"tables": tables,
"logic_trees": logic_trees,
"resolved_conflicts": relevant_conflicts
}
def format_context_package(pkg: dict) -> str:
"""Format a context package as a readable string for the prompt."""
parts = []
# Texts
parts.append("【文字段落】")
for i, t in enumerate(pkg.get("texts", [])):
parts.append(f"[{t.get('section', '?')}] {t.get('text', '')}")
if not pkg.get("texts"):
parts.append("(无)")
# Tables
parts.append("\n【表格数据】")
for i, tbl in enumerate(pkg.get("tables", [])):
parts.append(f"表格 {i+1} (section={tbl.get('section', '?')})")
headers = tbl.get("headers", [])
parts.append(f" 表头: {headers}")
parts.append(" 全部行数据:")
for row in tbl.get("all_rows", []):
parts.append(
f"{row.get('row','?')}[{row.get('name','?')}]: {row.get('text','')}"
)
# Highlight matched rows if any
matched = tbl.get("rows", [])
if matched:
parts.append(" <重点关注行>:")
for mr in matched:
parts.append(f"{mr.get('row','?')}: {mr.get('cells', {})}")
if not pkg.get("tables"):
parts.append("(无)")
# Logic trees
parts.append("\n【逻辑树】")
for i, lt in enumerate(pkg.get("logic_trees", [])):
parts.append(f"逻辑树 {i+1} (image_id={lt.get('image_id', '?')})")
parts.append(f" 描述: {lt.get('description', '')[:200]}")
tree = lt.get("tree", {})
parts.append(f" 根: {tree.get('root', '?')}")
parts.append(" 节点:")
for node in tree.get("nodes", []):
nid = node.get("id", "?")
ntype = node.get("type", "?")
desc = node.get("description", "") or node.get("condition", "")
parts.append(f" [{ntype}] {nid}: {desc}")
for br in node.get("branches", []):
parts.append(f"{br['value']}{br['target']}")
if not pkg.get("logic_trees"):
parts.append("(无)")
# Conflicts
conflicts = pkg.get("resolved_conflicts", [])
if conflicts:
parts.append("\n【图文冲突仲裁】")
for c in conflicts:
parts.append(
f" [{c.get('conflict_type', '?')}] 以{c.get('source', '?')}为准: "
f"{c.get('correction', '')}"
)
return "\n".join(parts)
def _escape_json_for_format(s: str) -> str:
"""Escape curly braces in a JSON string for use with str.format()."""
return s.replace("{", "{{").replace("}", "}}")
def build_prompt(pkg: dict, format_feedback: str = "") -> str:
"""Build the LLM prompt for a single function unit."""
template_path = Path(config.PROMPTS_DIR) / "step2_ir_extraction.txt"
template = template_path.read_text(encoding="utf-8")
prompt = template.format(
unit_id=pkg["unit_id"],
unit_name=_escape_json_for_format(pkg["unit_name"]),
unit_description=_escape_json_for_format(pkg["unit_description"]),
texts=_escape_json_for_format(
json.dumps(pkg.get("texts", []), ensure_ascii=False, indent=2)
),
tables=_escape_json_for_format(
json.dumps(pkg.get("tables", []), ensure_ascii=False, indent=2)
),
logic_trees=_escape_json_for_format(
json.dumps(pkg.get("logic_trees", []), ensure_ascii=False, indent=2)
),
resolved_conflicts=_escape_json_for_format(
json.dumps(pkg.get("resolved_conflicts", []), ensure_ascii=False, indent=2)
),
format_feedback=_escape_json_for_format(format_feedback),
)
return prompt
def extract_json_from_response(text: str) -> str:
"""Extract JSON array from LLM response."""
m = re.search(r"```(?:json)?\s*(\[[\s\S]*?\])\s*```", text)
if m:
return m.group(1).strip()
# Find outermost [ ... ]
start = text.find("[")
if start == -1:
raise ValueError("No JSON array found in LLM response")
depth = 0
for i in range(start, len(text)):
if text[i] == "[":
depth += 1
elif text[i] == "]":
depth -= 1
if depth == 0:
return text[start : i + 1]
raise ValueError("Unclosed JSON array in LLM response")
def _check_rule_fields(rules: list[dict]) -> tuple[bool, list[dict]]:
"""Validate each rule has required fields. Returns (passed, failures).
Each failure: {rule_id, field, issue}
"""
failures = []
for j, rule in enumerate(rules):
if not isinstance(rule, dict):
failures.append({"rule_id": f"rule[{j}]", "field": "-", "issue": "规则不是 dict"})
continue
rid = rule.get("rule_id") or f"rule[{j}]"
if not rule.get("path"):
failures.append({"rule_id": rid, "field": "path", "issue": "缺少 path 字段(必填)"})
precond = rule.get("precondition") or {}
if not precond.get("geographic_scope"):
failures.append({"rule_id": rid, "field": "precondition.geographic_scope", "issue": "缺少 geographic_scope(必填)"})
for k, action in enumerate(rule.get("actions") or []):
if not isinstance(action, dict):
continue
if action.get("type") == "user_interaction":
content = action.get("content") or ""
if not content:
failures.append({
"rule_id": rid, "field": f"actions[{k}].content",
"issue": "user_interaction 的 content 为空"
})
elif any(ph in content for ph in ["文案由业务定义", "待定", "自定义"]):
failures.append({
"rule_id": rid, "field": f"actions[{k}].content",
"issue": f"content 包含占位符: '{content}'"
})
trigger = rule.get("trigger") or {}
for k, cond in enumerate(trigger.get("conditions") or []):
if isinstance(cond, dict):
if not cond.get("signal"):
failures.append({
"rule_id": rid, "field": f"trigger.conditions[{k}].signal",
"issue": "缺少 signal"
})
if not cond.get("operator"):
failures.append({
"rule_id": rid, "field": f"trigger.conditions[{k}].operator",
"issue": "缺少 operator"
})
if "value" not in cond:
failures.append({
"rule_id": rid, "field": f"trigger.conditions[{k}].value",
"issue": "缺少 value"
})
return len(failures) == 0, failures
def _build_fix_prompt(failures: list[dict]) -> str:
"""Build a format-fix instruction block for the prompt."""
if not failures:
return ""
lines = [
"\n## 上一轮格式问题修正\n",
"上一轮输出的规则存在以下格式问题,请修正后重新输出:\n",
]
for f in failures:
lines.append(f"- **{f['rule_id']}.{f['field']}**: {f['issue']}")
lines.append("\n请修正以上所有问题,重新输出完整的规则数组。")
return "\n".join(lines)
def extract_rules_for_unit(pkg: dict, max_retries: int | None = None) -> list[dict]:
"""Call LLM for one function unit, return its IR rules.
Includes format validation with auto-fix retries.
"""
if max_retries is None:
max_retries = config.MAX_RETRIES_PER_STAGE
client = config.llm_client()
prompt = build_prompt(pkg)
last_failures = []
for attempt in range(max_retries + 1):
# Append format feedback on retry
if attempt > 0 and last_failures:
fix_text = _build_fix_prompt(last_failures)
prompt = build_prompt(pkg, format_feedback=fix_text)
try:
resp = client.chat.completions.create(
model=config.MODEL_NAME,
messages=[
{
"role": "system",
"content": "你是一个精确的 JSON 输出引擎。只输出合法的 JSON 数组。",
},
{"role": "user", "content": prompt},
],
temperature=config.TEMPERATURE,
max_tokens=config.MAX_TOKENS,
)
content = resp.choices[0].message.content
if content is None:
raise RuntimeError("LLM returned empty response")
json_str = extract_json_from_response(content)
rules = json.loads(json_str)
if not isinstance(rules, list):
raise ValueError(f"Expected JSON array, got {type(rules).__name__}")
# Format validation
passed, failures = _check_rule_fields(rules)
if passed:
return rules
# Format issues found — retry with fix instructions
print(f" 格式问题 ({len(failures)} 个): {[f['field'] for f in failures[:5]]}")
last_failures = failures
if attempt < max_retries:
time.sleep(1)
except (json.JSONDecodeError, ValueError) as e:
print(f" JSON 解析失败 (尝试 {attempt + 1}): {e}")
last_failures = [{"rule_id": "?", "field": "json", "issue": str(e)}]
if attempt < max_retries:
time.sleep(2)
# Exhausted retries — return what we have (even if imperfect)
print(f" WARN: {pkg['unit_id']} 格式修复耗尽了 {max_retries} 次重试")
return []
def extract_all_rules(
semantic_index: dict, doc: dict
) -> list[dict]:
"""Extract IR rules for all function units. Runs in parallel up to MAX_CONCURRENCY."""
sections_by_source, image_by_rid, conflicts_by_section = build_document_lookup(doc)
function_units = semantic_index.get("function_units", [])
print(f"{len(function_units)} 个功能单元待处理")
print(f" 最大并发: {MAX_CONCURRENCY}")
# Build context packages (serial — fast)
packages = []
for fu in function_units:
pkg = extract_context_package(
fu, doc, sections_by_source, image_by_rid, conflicts_by_section
)
packages.append(pkg)
# Run LLM calls in parallel
fragments = []
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_CONCURRENCY) as executor:
futures = {}
for i, pkg in enumerate(packages):
future = executor.submit(extract_rules_for_unit, pkg)
futures[future] = (i, pkg["unit_id"], pkg["unit_name"])
for future in concurrent.futures.as_completed(futures):
i, uid, uname = futures[future]
try:
rules = future.result()
fragments.append({
"unit_id": uid,
"unit_name": uname,
"rules": rules
})
print(f" [OK] {uid} ({uname}): {len(rules)} 条规则")
except Exception as e:
print(f" [FAIL] {uid} ({uname}): 失败 — {e}")
fragments.append({
"unit_id": uid,
"unit_name": uname,
"rules": [],
"error": str(e)
})
# Sort by unit_id to maintain stable ordering
fragments.sort(key=lambda f: f["unit_id"])
return fragments
def main():
print("=" * 60)
print("阶段二:逐功能单元 IR 提取")
print("=" * 60)
# 1. Load inputs
print(f"\n[1/3] 加载输入...")
semantic_index = load_semantic_index()
doc = config.load_input_document()
n_units = len(semantic_index.get("function_units", []))
print(f" 语义索引: {n_units} 个功能单元")
# 2. Extract rules
print(f"\n[2/3] 逐单元提取 IR 规则...")
fragments = extract_all_rules(semantic_index, doc)
# 3. Save
print(f"\n[3/3] 保存 IR 片段...")
config.save_json(fragments, config.IR_FRAGMENTS_JSON)
total_rules = sum(len(f["rules"]) for f in fragments)
failed_units = [f for f in fragments if f.get("error")]
print(f"\n完成! {len(fragments)} 个功能单元, 共 {total_rules} 条规则")
if failed_units:
print(f" [WARN] {len(failed_units)} 个单元提取失败: "
f"{[f['unit_id'] for f in failed_units]}")
print(f"输出: {config.IR_FRAGMENTS_JSON}")
if __name__ == "__main__":
main()
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,472 @@
"""
Tests for ensemble_merge.py — all pure Python, no LLM calls, no file I/O.
Each test uses hardcoded mock data to verify one piece of the merge logic.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
from ensemble_merge import (
concept_name_similarity,
cluster_concepts,
merge_concept_cluster,
unit_node_jaccard,
path_similarity,
unit_similarity,
cluster_function_units,
pick_best_representative,
compute_confidence_versions,
ensemble_merge_concepts,
ensemble_merge_function_units,
ensemble_merge,
_collect_logic_tree_nodes,
)
PASS = "[PASS]"
FAIL = "[FAIL]"
# ---- Mock helpers ----
def _mk_unit(unit_id, name, path, logic_tree_nodes, description="", sources=None):
"""Create a minimal function_unit dict for testing."""
if sources is None:
srcs = []
if logic_tree_nodes:
srcs.append({
"image_id": "rId16",
"type": "logic_tree",
"logic_tree_nodes": logic_tree_nodes,
})
if not srcs:
srcs.append({
"section": "3.1",
"type": "table",
"text_snippet": "test",
})
else:
srcs = sources
return {
"unit_id": unit_id,
"name": name,
"description": description or f"desc for {name}",
"path": path,
"sources": srcs,
}
def _mk_concept(name, parent=None, aliases=None, defined_in=None):
"""Create a minimal concept dict for testing."""
return {
"name": name,
"aliases": aliases or [],
"defined_in": defined_in or ["3.1"],
"parent": parent,
}
# =============================================================================
# Test 1: concept_name_similarity
# =============================================================================
def test_concept_name_similarity_exact():
assert concept_name_similarity("国内", "国内") == 1.0
assert concept_name_similarity("行车娱乐限制", "行车娱乐限制") == 1.0
def test_concept_name_similarity_substring():
sim = concept_name_similarity("国内行车娱乐限制", "行车娱乐限制")
assert sim >= 0.85, f"expected >= 0.85, got {sim}"
def test_concept_name_similarity_different():
sim = concept_name_similarity("国内", "海外")
assert sim < 0.7, f"expected < 0.7, got {sim}"
def test_concept_name_similarity_seq_matcher():
sim = concept_name_similarity("前台打断", "前台应用打断")
assert 0.6 < sim < 0.95, f"expected 0.6-0.95, got {sim}"
# =============================================================================
# Test 2: _collect_logic_tree_nodes
# =============================================================================
def test_collect_logic_tree_nodes():
unit = _mk_unit("U1", "test", ["A"], ["n1", "n2", "n3"])
nodes = _collect_logic_tree_nodes(unit)
assert nodes == {"n1", "n2", "n3"}
def test_collect_logic_tree_nodes_empty():
unit = _mk_unit("U2", "test", ["A"], [], sources=[{"section": "3.1", "type": "table"}])
nodes = _collect_logic_tree_nodes(unit)
assert nodes == set()
# =============================================================================
# Test 3: unit_node_jaccard
# =============================================================================
def test_unit_node_jaccard_identical():
u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2", "n3"])
u2 = _mk_unit("U2", "b", ["A"], ["n1", "n2", "n3"])
assert unit_node_jaccard(u1, u2) == 1.0
def test_unit_node_jaccard_partial():
u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2", "n3", "n4"])
u2 = _mk_unit("U2", "b", ["A"], ["n1", "n2", "n3"])
# intersection=3, union=4
assert abs(unit_node_jaccard(u1, u2) - 0.75) < 0.01
def test_unit_node_jaccard_disjoint():
u1 = _mk_unit("U1", "a", ["A"], ["n1", "n2"])
u2 = _mk_unit("U2", "b", ["B"], ["n3", "n4"])
assert unit_node_jaccard(u1, u2) == 0.0
def test_unit_node_jaccard_both_empty():
u1 = _mk_unit("U1", "a", ["A"], [], sources=[{"section": "3.1", "type": "table"}])
u2 = _mk_unit("U2", "b", ["B"], [], sources=[{"section": "3.1", "type": "table"}])
assert unit_node_jaccard(u1, u2) == 0.0
# =============================================================================
# Test 4: path_similarity
# =============================================================================
def test_path_similarity_identical():
assert path_similarity(
["国内", "系统限制", "前台打断"],
["国内", "系统限制", "前台打断"],
) == 1.0
def test_path_similarity_partial():
sim = path_similarity(
["国内", "系统限制", "前台打断"],
["国内", "系统限制", "后台限制启动"],
)
# 2/3 set overlap, sequential 3/5 ≈ 0.6
assert 0.4 < sim < 0.9, f"expected 0.4-0.9, got {sim}"
def test_path_similarity_different():
sim = path_similarity(["国内"], ["海外"])
assert sim < 0.7, f"expected < 0.7, got {sim}"
# =============================================================================
# Test 5: unit_similarity
# =============================================================================
def test_unit_similarity_identical():
u = _mk_unit("U1", "国内-系统限制-前台打断",
["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19"])
assert unit_similarity(u, u) > 0.99
def test_unit_similarity_different():
u1 = _mk_unit("U1", "a", ["国内", "系统限制", "前台打断"], ["n1", "n2", "n3"])
u2 = _mk_unit("U2", "b", ["海外", "SDK限制"], ["n10", "n11", "n12"])
assert unit_similarity(u1, u2) < 0.3
# =============================================================================
# Test 6: cluster_concepts
# =============================================================================
def test_cluster_concepts_identical():
v0 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
v1 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
v2 = [_mk_concept("国内"), _mk_concept("海外"), _mk_concept("系统限制", parent="国内")]
clusters = cluster_concepts([v0, v1, v2])
# Should have exactly 3 clusters (国内, 海外, 系统限制)
assert len(clusters) == 3, f"expected 3 clusters, got {len(clusters)}"
for c in clusters:
assert len(c) == 3, f"expected each cluster to have 3 members, got {len(c)}"
def test_cluster_concepts_name_variation():
v0 = [_mk_concept("国内行车娱乐限制", parent="国内")]
v1 = [_mk_concept("行车娱乐限制", parent="国内")]
v2 = [_mk_concept("国内行车娱乐限制", parent="国内")]
clusters = cluster_concepts([v0, v1, v2])
assert len(clusters) == 1, f"expected 1 cluster, got {len(clusters)}"
assert len(clusters[0]) == 3, f"expected 3 members, got {len(clusters[0])}"
# =============================================================================
# Test 7: merge_concept_cluster
# =============================================================================
def test_merge_concept_cluster():
cluster = [
(0, _mk_concept("国内行车娱乐限制", parent="国内", aliases=["限制"])),
(1, _mk_concept("行车娱乐限制", parent="国内", aliases=["行车限制"])),
(2, _mk_concept("行车娱乐限制", parent="国内", aliases=["限制"])),
]
merged, conf = merge_concept_cluster(cluster, 3)
assert "行车娱乐限制" in merged["name"]
assert merged["parent"] == "国内"
assert set(merged["aliases"]) == {"限制", "行车限制"}
assert conf in ("high", "medium")
# =============================================================================
# Test 8: cluster_function_units
# =============================================================================
def test_cluster_function_units_all_agree():
u0 = _mk_unit("U-001", "国内-系统限制-前台打断",
["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
"switch ON, system app, foreground, speed>=15, non-P, interrupt + toast")
u1 = _mk_unit("U-001", "国内-系统限制-前台打断",
["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
"switch ON, system app, foreground, speed>=15, non-P, interrupt + toast")
u2 = _mk_unit("U-001", "国内-系统限制-前台打断",
["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
"switch ON, system app, foreground, interrupt")
clusters = cluster_function_units([[u0], [u1], [u2]])
assert len(clusters) == 1, f"expected 1 cluster, got {len(clusters)}"
assert len(clusters[0]) == 3
def test_cluster_function_units_partial_agree():
u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19"])
u1 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19"])
u2 = _mk_unit("U-002", "禁止", ["国内", "系统限制", "后台限制启动"],
["n5", "n6"])
clusters = cluster_function_units([[u0], [u1], [u2]])
# u0+u1 in one cluster, u2 in another
assert len(clusters) == 2, f"expected 2 clusters, got {len(clusters)}"
cluster_sizes = sorted(len(c) for c in clusters)
assert cluster_sizes == [1, 2], f"expected cluster sizes [1,2], got {cluster_sizes}"
def test_cluster_function_units_all_disagree():
u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"], ["n1", "n2", "n3"])
u1 = _mk_unit("U-002", "禁止", ["国内", "系统限制", "后台限制启动"], ["n5", "n6"])
u2 = _mk_unit("U-003", "SDK", ["国内", "SDK限制"], ["n10", "n11"])
clusters = cluster_function_units([[u0], [u1], [u2]])
assert len(clusters) == 3, f"expected 3 clusters, got {len(clusters)}"
# =============================================================================
# Test 9: pick_best_representative
# =============================================================================
def test_pick_best_representative_prefers_rich():
u0 = _mk_unit("U-001", "short", ["国内", "系统限制"],
["n1", "n2", "n3"],
description="short desc")
u1 = _mk_unit("U-001", "detailed", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
description="very detailed description of the full rule behavior " * 5)
cluster = [(0, u0), (1, u1)]
best = pick_best_representative(cluster)
# u1 should win: more nodes, longer description, though u0 has lower temp
assert best["name"] == "detailed"
# =============================================================================
# Test 10: compute_confidence_versions
# =============================================================================
def test_confidence_high_unanimous():
assert compute_confidence_versions(3, 3, True) == "high"
def test_confidence_high_two_of_three_with_t0():
assert compute_confidence_versions(2, 3, True) == "high"
def test_confidence_medium_two_of_three_without_t0():
assert compute_confidence_versions(2, 3, False) == "medium"
def test_confidence_low_one_of_three():
assert compute_confidence_versions(1, 3, False) == "low"
def test_confidence_high_all_two_versions():
assert compute_confidence_versions(2, 2, True) == "high"
# =============================================================================
# Test 11: ensemble_merge_concepts
# =============================================================================
def test_ensemble_merge_concepts():
v0 = [_mk_concept("国内"), _mk_concept("海外"),
_mk_concept("国内行车娱乐限制", parent="国内")]
v1 = [_mk_concept("国内"), _mk_concept("海外"),
_mk_concept("行车娱乐限制", parent="国内",
aliases=["限制"], defined_in=["3.1", "3.1.1"])]
v2 = [_mk_concept("国内"), _mk_concept("海外"),
_mk_concept("行车娱乐限制", parent="国内")]
merged = ensemble_merge_concepts([v0, v1, v2])
# Should merge the 3 concepts across 3 versions into 3 clusters
assert len(merged) == 3, f"expected 3 merged concepts, got {len(merged)}"
for c in merged:
assert "confidence" in c
assert "ensemble_support" in c
assert c["ensemble_support"] == "3/3"
# =============================================================================
# Test 12: ensemble_merge_function_units
# =============================================================================
def test_ensemble_merge_function_units():
u0 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
description="full description A")
u1 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25", "n26"],
description="full description B (more detail)")
u2 = _mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n21", "n23", "n25"],
description="partial description")
merged = ensemble_merge_function_units([[u0], [u1], [u2]])
assert len(merged) == 1, f"expected 1 unit, got {len(merged)}"
unit = merged[0]
assert unit["confidence"] == "high"
assert unit["ensemble_support"] == "3/3"
assert unit["source_versions"] == 3
assert unit["unit_id"].startswith("FU-ENS-")
# Should have picked u1 (more detail)
assert "more detail" in unit["description"]
# =============================================================================
# Test 13: ensemble_merge full integration
# =============================================================================
def test_ensemble_merge_full():
v0 = {
"feature_name": "行车娱乐限制",
"concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
"function_units": [
_mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
_mk_unit("U-002", "后台禁止", ["国内", "系统限制", "后台限制启动"],
["n5", "n6"]),
],
}
v1 = {
"feature_name": "行车娱乐限制",
"concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
"function_units": [
_mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
_mk_unit("U-003", "SDK自定义", ["国内", "SDK限制", "自定义限制"],
["n10", "n11"]),
],
}
v2 = {
"feature_name": "行车娱乐限制",
"concepts": [_mk_concept("国内"), _mk_concept("系统限制", parent="国内")],
"function_units": [
_mk_unit("U-001", "打断", ["国内", "系统限制", "前台打断"],
["n1", "n2", "n3", "n8", "n19", "n25", "n26"]),
],
}
result = ensemble_merge([v0, v1, v2])
assert result["feature_name"] == "行车娱乐限制"
assert result["ensemble_versions"] == 3
units = result["function_units"]
concepts = result["concepts"]
# Concepts: 国内 + 系统限制
assert len(concepts) == 2
# Units: 打断 (3 versions → high), 后台禁止 (1 version → low), SDK (1 version → low)
assert len(units) == 3
high_units = [u for u in units if u["confidence"] == "high"]
low_units = [u for u in units if u["confidence"] == "low"]
assert len(high_units) == 1
assert len(low_units) == 2
# All units should have ensemble fields
for u in units:
assert "confidence" in u
assert "ensemble_support" in u
assert "source_versions" in u
# Confidence summary
cs = result["confidence_summary"]
assert cs["total_units"] == 3
assert cs["high"] == 1
assert cs["low"] == 2
# =============================================================================
# Runner
# =============================================================================
def run_all_tests():
print("=" * 60)
print("Ensemble Merge 测试 (纯 Python, 无 LLM)")
print("=" * 60)
tests = [
("concept_name_similarity exact", test_concept_name_similarity_exact),
("concept_name_similarity substring", test_concept_name_similarity_substring),
("concept_name_similarity different", test_concept_name_similarity_different),
("concept_name_similarity seq_matcher", test_concept_name_similarity_seq_matcher),
("collect_logic_tree_nodes", test_collect_logic_tree_nodes),
("collect_logic_tree_nodes empty", test_collect_logic_tree_nodes_empty),
("unit_node_jaccard identical", test_unit_node_jaccard_identical),
("unit_node_jaccard partial", test_unit_node_jaccard_partial),
("unit_node_jaccard disjoint", test_unit_node_jaccard_disjoint),
("unit_node_jaccard both_empty", test_unit_node_jaccard_both_empty),
("path_similarity identical", test_path_similarity_identical),
("path_similarity partial", test_path_similarity_partial),
("path_similarity different", test_path_similarity_different),
("unit_similarity identical", test_unit_similarity_identical),
("unit_similarity different", test_unit_similarity_different),
("cluster_concepts identical", test_cluster_concepts_identical),
("cluster_concepts name variation", test_cluster_concepts_name_variation),
("merge_concept_cluster", test_merge_concept_cluster),
("cluster_function_units all_agree", test_cluster_function_units_all_agree),
("cluster_function_units partial_agree", test_cluster_function_units_partial_agree),
("cluster_function_units all_disagree", test_cluster_function_units_all_disagree),
("pick_best_representative", test_pick_best_representative_prefers_rich),
("confidence high unanimous", test_confidence_high_unanimous),
("confidence high 2/3 with t0", test_confidence_high_two_of_three_with_t0),
("confidence medium 2/3 no t0", test_confidence_medium_two_of_three_without_t0),
("confidence low 1/3", test_confidence_low_one_of_three),
("confidence high 2/2", test_confidence_high_all_two_versions),
("ensemble_merge_concepts", test_ensemble_merge_concepts),
("ensemble_merge_function_units", test_ensemble_merge_function_units),
("ensemble_merge full", test_ensemble_merge_full),
]
passed = 0
failed = 0
for name, test_fn in tests:
try:
test_fn()
print(f" {PASS} {name}")
passed += 1
except AssertionError as e:
print(f" {FAIL} {name}: {e}")
failed += 1
except Exception as e:
print(f" {FAIL} {name}: unexpected {type(e).__name__}: {e}")
failed += 1
print(f"\n{'='*60}")
if failed == 0:
print(f"{PASS} 所有 {passed} 个测试通过!")
else:
print(f"{FAIL} {failed}/{passed + failed} 个测试失败")
print(f"{'='*60}")
return failed == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)
@@ -0,0 +1,370 @@
"""
Tests for Stage 1 (Semantic Index).
Validates that the generated semantic_index.json meets all completeness
and structural requirements, including the new iterative features:
- function_units have path fields
- concepts have parent references
- logic tree node coverage meets thresholds
"""
import json
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"
def load_inputs():
"""Load semantic_index.json and the original parsed document."""
try:
si = config.load_json(config.SEMANTIC_INDEX_JSON)
except FileNotFoundError:
print(f"{FAIL} semantic_index.json 未找到: {config.SEMANTIC_INDEX_JSON}")
print(" 请先运行 step1_semantic_index.py")
sys.exit(1)
doc = config.load_input_document()
return si, doc
def build_image_index(doc: dict) -> dict[str, dict]:
"""Build lookup: image rId -> image_analysis entry."""
idx = {}
for img in doc.get("image_analysis", []):
rid = img.get("rid", "")
if rid:
idx[rid] = img
return idx
def build_logic_tree_node_index(doc: dict) -> dict[str, set[str]]:
"""Build lookup: image rId -> set of all node IDs in that logic_tree."""
idx = {}
for img in doc.get("image_analysis", []):
rid = img.get("rid", "")
lt = img.get("logic_tree")
if lt and rid:
node_ids = {n["id"] for n in lt.get("nodes", [])}
idx[rid] = node_ids
return idx
def check_unit_ids(units: list[dict]) -> list[str]:
"""Check that every function_unit has a non-empty unit_id and name."""
errors = []
seen_ids = set()
for i, fu in enumerate(units):
uid = fu.get("unit_id", "")
name = fu.get("name", "")
if not uid:
errors.append(f"function_unit[{i}]: unit_id 为空")
elif uid in seen_ids:
errors.append(f"function_unit[{i}]: unit_id '{uid}' 重复")
seen_ids.add(uid)
if not name:
errors.append(f"function_unit[{i}] ({uid}): name 为空")
return errors
def check_unit_paths(units: list[dict]) -> list[str]:
"""Check that every function_unit has a non-empty path array."""
errors = []
for fu in units:
uid = fu.get("unit_id", "?")
path = fu.get("path", [])
if not path:
errors.append(f"{uid}: path 字段为空或缺失")
elif not isinstance(path, list):
errors.append(f"{uid}: path 必须是数组")
return errors
def check_concept_parents(concepts: list[dict]) -> list[str]:
"""Check that non-scope concepts have valid parent references."""
errors = []
concept_names = {c.get("name", "") for c in concepts}
scope_concepts = {"国内", "海外"}
for c in concepts:
name = c.get("name", "?")
parent = c.get("parent", "")
if name in scope_concepts:
# Scope concepts should have no parent
if parent:
errors.append(f"scope 概念 '{name}' 不应有 parent (当前: '{parent}')")
else:
# Non-scope concepts must have a parent
if not parent:
errors.append(f"概念 '{name}' 缺少 parent 字段")
elif parent not in concept_names:
errors.append(f"概念 '{name}' 的 parent '{parent}' 不存在于 concepts 中")
return errors
def check_sources_exist(
units: list[dict], image_index: dict[str, dict], node_index: dict[str, set[str]]
) -> list[str]:
"""Check that all source references point to real content."""
errors = []
for fu in units:
uid = fu.get("unit_id", "?")
sources = fu.get("sources", [])
if not sources:
errors.append(f"{uid}: sources 为空,必须至少引用一张图片或一段文字")
continue
has_text = False
has_image = False
for j, src in enumerate(sources):
src_type = src.get("type", "")
if src_type in ("table", "para"):
has_text = True
section = src.get("section", "")
if not section:
errors.append(f"{uid}.sources[{j}]: 缺少 section")
elif src_type == "logic_tree":
has_image = True
image_id = src.get("image_id", "")
if not image_id:
errors.append(f"{uid}.sources[{j}]: logic_tree 缺少 image_id")
continue
if image_id not in image_index:
errors.append(
f"{uid}.sources[{j}]: image_id '{image_id}' "
f"在 image_analysis 中不存在"
)
continue
node_ids = src.get("logic_tree_nodes", [])
if node_ids and image_id in node_index:
valid_nodes = node_index[image_id]
for nid in node_ids:
if nid not in valid_nodes:
errors.append(
f"{uid}.sources[{j}]: 节点 '{nid}'"
f"{image_id} 的逻辑树中不存在"
)
elif not node_ids:
errors.append(
f"{uid}.sources[{j}]: logic_tree 类型但未提供 logic_tree_nodes"
)
if not has_text and not has_image:
errors.append(f"{uid}: 必须至少引用一个文本或图片来源")
return errors
def check_logic_tree_coverage(
units: list[dict], node_index: dict[str, set[str]]
) -> list[str]:
"""Check that decision and action nodes in logic trees are covered."""
warnings = []
for image_id, all_nodes in node_index.items():
referenced = set()
for fu in units:
for src in fu.get("sources", []):
if src.get("image_id") == image_id:
for nid in src.get("logic_tree_nodes", []):
referenced.add(nid)
uncovered = all_nodes - referenced
if uncovered:
doc = config.load_input_document()
node_types = {}
for img in doc.get("image_analysis", []):
if img.get("rid") == image_id:
lt = img.get("logic_tree", {})
for n in lt.get("nodes", []):
node_types[n["id"]] = n.get("type", "?")
break
decision_action_uncovered = [
n for n in uncovered if node_types.get(n) in ("decision", "action")
]
if decision_action_uncovered:
warnings.append(
f"{image_id}: {len(decision_action_uncovered)}"
f"decision/action 节点未被引用: {decision_action_uncovered}"
)
return warnings
def check_ensemble_confidence(units: list[dict]) -> list[str]:
"""Check that every function_unit has confidence, ensemble_support, source_versions."""
errors = []
valid_conf = {"high", "medium", "low"}
for fu in units:
uid = fu.get("unit_id", "?")
conf = fu.get("confidence", "")
if not conf:
errors.append(f"{uid}: 缺少 confidence 字段")
elif conf not in valid_conf:
errors.append(f"{uid}: confidence='{conf}' 无效 (期望 high/medium/low)")
support = fu.get("ensemble_support", "")
if not support:
errors.append(f"{uid}: 缺少 ensemble_support 字段")
if "source_versions" not in fu:
errors.append(f"{uid}: 缺少 source_versions 字段")
return errors
def check_confidence_summary(si: dict) -> list[str]:
"""Check that confidence_summary counts match actual unit/concept confidence."""
errors = []
cs = si.get("confidence_summary", {})
if not cs:
errors.append("缺少 confidence_summary 字段")
return errors
units = si.get("function_units", [])
concepts = si.get("concepts", [])
# Count actual confidence levels
unit_high = sum(1 for u in units if u.get("confidence") == "high")
unit_medium = sum(1 for u in units if u.get("confidence") == "medium")
unit_low = sum(1 for u in units if u.get("confidence") == "low")
concept_high = sum(1 for c in concepts if c.get("confidence") == "high")
concept_medium = sum(1 for c in concepts if c.get("confidence") == "medium")
concept_low = sum(1 for c in concepts if c.get("confidence") == "low")
if cs.get("total_units", 0) != len(units):
errors.append(f"confidence_summary.total_units={cs.get('total_units')} != 实际 {len(units)}")
if cs.get("high", 0) != unit_high:
errors.append(f"confidence_summary.high={cs.get('high')} != 实际 {unit_high}")
if cs.get("medium", 0) != unit_medium:
errors.append(f"confidence_summary.medium={cs.get('medium')} != 实际 {unit_medium}")
if cs.get("low", 0) != unit_low:
errors.append(f"confidence_summary.low={cs.get('low')} != 实际 {unit_low}")
if cs.get("total_concepts", 0) != len(concepts):
errors.append(f"confidence_summary.total_concepts={cs.get('total_concepts')} != 实际 {len(concepts)}")
if cs.get("concept_high", 0) != concept_high:
errors.append(f"confidence_summary.concept_high={cs.get('concept_high')} != 实际 {concept_high}")
if cs.get("concept_medium", 0) != concept_medium:
errors.append(f"confidence_summary.concept_medium={cs.get('concept_medium')} != 实际 {concept_medium}")
if cs.get("concept_low", 0) != concept_low:
errors.append(f"confidence_summary.concept_low={cs.get('concept_low')} != 实际 {concept_low}")
return errors
def run_all_tests():
print("=" * 60)
print("Step 1 自检测试")
print("=" * 60)
si, doc = load_inputs()
units = si.get("function_units", [])
concepts = si.get("concepts", [])
image_index = build_image_index(doc)
node_index = build_logic_tree_node_index(doc)
all_errors = []
all_warnings = []
# Test 1: unit_id and name validity
errors = check_unit_ids(units)
if errors:
print(f"\n{FAIL} unit_id/name 检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} unit_id/name 检查: 全部通过 ({len(units)} 个功能单元)")
# Test 2: path fields
errors = check_unit_paths(units)
if errors:
print(f"\n{FAIL} path 字段检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} path 字段检查: 全部通过")
# Test 3: concept parent references
errors = check_concept_parents(concepts)
if errors:
print(f"\n{FAIL} concept parent 检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} concept parent 检查: 全部通过 ({len(concepts)} 个概念)")
# Test 4: source references exist
errors = check_sources_exist(units, image_index, node_index)
if errors:
print(f"\n{FAIL} 来源引用检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 来源引用检查: 全部通过")
# Test 5: Logic tree coverage
warnings = check_logic_tree_coverage(units, node_index)
if warnings:
print(f"\n{WARN} 逻辑树节点覆盖率: {len(warnings)} 个警告")
for w in warnings:
print(f" - {w}")
all_warnings.extend(warnings)
else:
print(f"\n{PASS} 逻辑树节点覆盖率: 全部通过")
# Test 6: Ensemble confidence fields on function_units
errors = check_ensemble_confidence(units)
if errors:
print(f"\n{FAIL} 集成置信度字段: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 集成置信度字段: 全部通过")
# Test 7: Confidence summary consistency
errors = check_confidence_summary(si)
if errors:
print(f"\n{FAIL} confidence_summary 一致性: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
cs = si.get("confidence_summary", {})
print(f"\n{PASS} confidence_summary 一致性: "
f"high={cs.get('high',0)}, medium={cs.get('medium',0)}, "
f"low={cs.get('low',0)}")
# Summary
print(f"\n{'='*60}")
total_failures = len(all_errors)
total_warnings = len(all_warnings)
if total_failures == 0 and total_warnings == 0:
print(f"{PASS} 所有测试通过!")
elif total_failures == 0:
print(f"{WARN} 全部通过但有 {total_warnings} 个警告")
else:
print(f"{FAIL} 测试失败: {total_failures} 个错误, {total_warnings} 个警告")
print("\n请检查 LLM 输出质量,可能需要调整 Prompt 并重新运行 step1_semantic_index.py")
print(f"\n统计:")
print(f" 功能单元数: {len(units)}")
print(f" 概念数: {len(concepts)}")
print(f" 逻辑树图片数: {len(node_index)}")
return total_failures == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)
@@ -0,0 +1,322 @@
"""
Tests for Stage 2 (IR Extraction).
Validates that ir_fragments.json meets quality and structural requirements:
- All fragments have non-empty rules
- All rules have path arrays
- All rules have precondition.geographic_scope and precondition.screen_type
- All trigger conditions have signal/operator/value
- user_interaction content is non-empty and not a placeholder
- No duplicate rule_ids (across all fragments)
"""
import json
import sys
from pathlib import Path
from collections import Counter
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"
# Forbidden placeholder phrases in user_interaction content
FORBIDDEN_PLACEHOLDERS = [
"文案由业务定义", "待定", "自定义", "TBD", "todo", "TODO"
]
def load_fragments():
"""Load ir_fragments.json."""
try:
return config.load_json(config.IR_FRAGMENTS_JSON)
except FileNotFoundError:
print(f"{FAIL} ir_fragments.json 未找到: {config.IR_FRAGMENTS_JSON}")
print(" 请先运行 step2_ir_extraction.py")
sys.exit(1)
def check_non_empty_rules(fragments: list[dict]) -> list[str]:
"""Every fragment must have at least one rule."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
rules = f.get("rules", [])
if not rules:
if f.get("error"):
errors.append(f"{uid}: 提取失败 — {f['error']}")
else:
errors.append(f"{uid}: rules 为空")
return errors
def check_rule_paths(fragments: list[dict]) -> list[str]:
"""Every rule must have a non-empty path array."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
path = rule.get("path", [])
if not path:
errors.append(f"{rid}: path 字段为空或缺失")
elif not isinstance(path, list):
errors.append(f"{rid}: path 必须是数组")
return errors
def check_precondition_fields(fragments: list[dict]) -> list[str]:
"""Every rule must have precondition with geographic_scope and screen_type."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
precond = rule.get("precondition", {})
if not precond:
errors.append(f"{rid}: precondition 缺失")
continue
if not precond.get("geographic_scope"):
errors.append(f"{rid}: precondition.geographic_scope 缺失")
if "screen_type" not in precond:
errors.append(f"{rid}: precondition.screen_type 缺失")
return errors
def check_user_interaction_content(fragments: list[dict]) -> list[str]:
"""user_interaction actions must have non-empty, non-placeholder content."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
for k, action in enumerate(rule.get("actions", [])):
if action.get("type") != "user_interaction":
continue
content = action.get("content", "")
if not content:
errors.append(
f"{rid}.actions[{k}]: user_interaction 的 content 为空"
)
elif any(ph in content for ph in FORBIDDEN_PLACEHOLDERS):
errors.append(
f"{rid}.actions[{k}]: content 包含占位符: '{content}'"
)
return errors
def check_sources_have_logic_tree_nodes(fragments: list[dict]) -> list[str]:
"""Every rule should reference at least one logic tree node in its sources."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
sources = rule.get("sources", [])
has_logic_tree = any(
src.get("type") == "logic_tree" and src.get("node_ids")
for src in sources
)
if not has_logic_tree:
has_text = any(
src.get("type") in ("table", "para") for src in sources
)
if not has_text:
errors.append(f"{rid}: sources 中既无逻辑树引用也无文字引用")
return errors
def check_trigger_conditions(fragments: list[dict]) -> list[str]:
"""Every trigger condition must have signal, operator, value."""
errors = []
for f in fragments:
uid = f.get("unit_id", "?")
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
trigger = rule.get("trigger", {})
conditions = trigger.get("conditions", [])
if trigger.get("event") is not None:
continue
for k, cond in enumerate(conditions):
signal = cond.get("signal", "")
operator = cond.get("operator", "")
has_value = "value" in cond
if not signal:
errors.append(f"{rid}.condition[{k}]: 缺少 signal")
if not operator:
errors.append(f"{rid}.condition[{k}]: 缺少 operator")
if not has_value:
errors.append(f"{rid}.condition[{k}]: 缺少 value")
return errors
def check_duplicate_rule_ids(fragments: list[dict]) -> list[str]:
"""Check for duplicate rule_ids across all fragments."""
all_rule_ids = []
for f in fragments:
for rule in f.get("rules", []):
rid = rule.get("rule_id", "")
if rid:
all_rule_ids.append(rid)
duplicates = [rid for rid, count in Counter(all_rule_ids).items() if count > 1]
errors = []
if duplicates:
errors.append(f"重复 rule_id: {duplicates}")
return errors
def check_action_types(fragments: list[dict]) -> list[str]:
"""Verify that actions have valid types."""
valid_types = {"system", "user_interaction"}
errors = []
for f in fragments:
for j, rule in enumerate(f.get("rules", [])):
rid = rule.get("rule_id", f"rule[{j}]")
for k, action in enumerate(rule.get("actions", [])):
atype = action.get("type", "")
if atype not in valid_types:
errors.append(
f"{rid}.action[{k}]: type='{atype}' 无效, "
f"应为 {valid_types}"
)
if atype == "user_interaction" and "content" not in action:
errors.append(
f"{rid}.action[{k}]: user_interaction 类型缺少 content 字段"
)
return errors
def run_all_tests():
print("=" * 60)
print("Step 2 自检测试")
print("=" * 60)
fragments = load_fragments()
all_errors = []
total_units = len(fragments)
total_rules = sum(len(f.get("rules", [])) for f in fragments)
# Test 1: Non-empty rules
errors = check_non_empty_rules(fragments)
if errors:
print(f"\n{FAIL} 非空规则检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 非空规则检查: 全部通过 ({total_units} 个片段)")
# Test 2: Rule path arrays
errors = check_rule_paths(fragments)
if errors:
print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 规则 path 字段: 全部通过")
# Test 3: Precondition fields
errors = check_precondition_fields(fragments)
if errors:
print(f"\n{FAIL} precondition 字段: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} precondition 字段: 全部通过")
# Test 4: user_interaction content
errors = check_user_interaction_content(fragments)
if errors:
print(f"\n{FAIL} user_interaction content: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} user_interaction content: 全部通过")
# Test 5: Sources have logic tree references
errors = check_sources_have_logic_tree_nodes(fragments)
if errors:
print(f"\n{FAIL} 来源节点引用: {len(errors)} 个规则缺少来源引用")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 来源节点引用: 全部通过")
# Test 6: Trigger conditions completeness
errors = check_trigger_conditions(fragments)
if errors:
print(f"\n{FAIL} 触发条件完整性: {len(errors)} 个条件不完整")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 触发条件完整性: 全部通过")
# Test 7: No duplicate rule_ids
errors = check_duplicate_rule_ids(fragments)
if errors:
print(f"\n{FAIL} rule_id 唯一性: 发现重复")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} rule_id 唯一性: 全部通过")
# Test 8: Valid action types
errors = check_action_types(fragments)
if errors:
print(f"\n{FAIL} 动作类型检查: {len(errors)} 个问题")
for e in errors[:10]:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 动作类型检查: 全部通过")
# Summary
print(f"\n{'='*60}")
total_failures = len(all_errors)
if total_failures == 0:
print(f"{PASS} 所有测试通过!")
else:
print(f"{FAIL} 测试失败: {total_failures} 个错误")
print("\n建议:")
print(" 1. 检查 ir_fragments.json 中出错的规则")
print(" 2. 如果某些功能单元的规则为空,检查上下文包是否丢失了关键信息")
print(" 3. 调整 Prompt (prompts/step2_ir_extraction.txt) 后重新运行")
print(f"\n统计:")
print(f" 功能单元数: {total_units}")
print(f" 规则总数: {total_rules}")
error_units = sum(1 for f in fragments if f.get("error"))
if error_units:
print(f" 提取失败的单元: {error_units}")
return total_failures == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)
@@ -0,0 +1,152 @@
"""
Tests for Stage 2.5 (Branch Coverage Auto-Completion).
Validates:
- Path enumeration exists and is non-empty
- Auto-complete fragments have valid structure
- No duplicate unit_ids in autocomplete fragments
- Path coverage improved after autocomplete (if applicable)
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"
def load_path_enumeration():
"""Load path_enumeration.json."""
try:
return config.load_json(config.PATH_ENUM_JSON)
except FileNotFoundError:
print(f"{FAIL} path_enumeration.json 未找到: {config.PATH_ENUM_JSON}")
print(" 请先运行 step2_5_branch_coverage.py")
sys.exit(1)
def load_autocomplete_fragments():
"""Load ir_autocomplete_fragments.json, or return [] if absent."""
path = config.IR_AUTOCOMPLETE_FRAGMENTS_JSON
if not Path(path).exists():
return None
return config.load_json(path)
def check_path_enumeration(data: dict) -> list[str]:
"""Check path enumeration has valid structure."""
errors = []
paths = data.get("logic_tree_paths", {})
if not paths:
errors.append("logic_tree_paths 为空")
total = data.get("total_paths", 0)
if total <= 0:
errors.append(f"total_paths = {total}, 期望 > 0")
for image_id, image_paths in paths.items():
if not image_paths:
errors.append(f"{image_id}: 路径列表为空")
continue
for i, p in enumerate(image_paths):
if not p.get("path_id"):
errors.append(f"{image_id}[{i}]: 缺少 path_id")
if not p.get("image_id"):
errors.append(f"{image_id}[{i}]: 缺少 image_id")
if not p.get("node_ids"):
errors.append(f"{image_id}[{i}]: 缺少 node_ids")
return errors
def check_autocomplete_fragments(fragments: list[dict] | None) -> list[str]:
"""Check auto-complete fragments have valid structure."""
if fragments is None:
return ["ir_autocomplete_fragments.json 未生成 (可能无需补全)"]
errors = []
seen_unit_ids = set()
for frag in fragments:
uid = frag.get("unit_id", "")
if not uid:
errors.append("fragment 缺少 unit_id")
continue
if uid in seen_unit_ids:
errors.append(f"unit_id '{uid}' 重复")
seen_unit_ids.add(uid)
if not frag.get("auto_generated"):
errors.append(f"{uid}: auto_generated 应为 true")
rules = frag.get("rules", [])
for j, rule in enumerate(rules):
rid = rule.get("rule_id", f"rule[{j}]")
if not rule.get("path"):
errors.append(f"{rid}: path 字段缺失")
precond = rule.get("precondition", {})
if not precond.get("geographic_scope"):
errors.append(f"{rid}: precondition.geographic_scope 缺失")
return errors
def run_all_tests():
print("=" * 60)
print("Step 2.5 自检测试")
print("=" * 60)
all_errors = []
# Test 1: Path enumeration exists
try:
path_data = load_path_enumeration()
except SystemExit:
return False
errors = check_path_enumeration(path_data)
if errors:
print(f"\n{FAIL} 路径枚举检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
total = path_data.get("total_paths", 0)
n_images = len(path_data.get("logic_tree_paths", {}))
print(f"\n{PASS} 路径枚举检查: {total} 条路径, {n_images} 个逻辑树")
# Test 2: Auto-complete fragments
fragments = load_autocomplete_fragments()
errors = check_autocomplete_fragments(fragments)
if fragments is None:
print(f"\n{WARN} 自动补全片段: 未生成 (可能所有路径已覆盖)")
elif errors:
print(f"\n{FAIL} 自动补全片段检查: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
all_errors.extend(errors)
else:
auto_rules = sum(len(f.get("rules", [])) for f in fragments)
print(f"\n{PASS} 自动补全片段检查: "
f"{len(fragments)} 个片段, {auto_rules} 条规则")
# Summary
print(f"\n{'='*60}")
total_failures = len(all_errors)
if total_failures == 0:
print(f"{PASS} 所有测试通过!")
else:
print(f"{FAIL} 测试失败: {total_failures} 个错误")
return total_failures == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)
@@ -0,0 +1,232 @@
"""
Tests for Stage 3 (Merge & Audit).
Validates:
- ir_final.json exists and is well-formed
- No duplicate rule_ids
- All rule_ids follow new hierarchical naming convention
- All rules have path arrays
- ir_audit_report.md exists and contains all required sections
"""
import re
import sys
from pathlib import Path
from collections import Counter
sys.path.insert(0, str(Path(__file__).parent.parent))
import config
PASS = "[PASS]"
FAIL = "[FAIL]"
WARN = "[WARN]"
def load_ir_final():
"""Load ir_final.json."""
try:
return config.load_json(config.IR_FINAL_JSON)
except FileNotFoundError:
print(f"{FAIL} ir_final.json 未找到: {config.IR_FINAL_JSON}")
print(" 请先运行 step3_merge_and_audit.py")
sys.exit(1)
def load_audit_report():
"""Load ir_audit_report.md if it exists."""
try:
with open(config.IR_AUDIT_REPORT_MD, "r", encoding="utf-8") as f:
return f.read()
except FileNotFoundError:
print(f"{FAIL} ir_audit_report.md 未找到: {config.IR_AUDIT_REPORT_MD}")
print(" 请先运行 step3_merge_and_audit.py")
sys.exit(1)
def check_rule_ids(ir: dict) -> list[str]:
"""Check for duplicate rule_ids and hierarchical naming convention.
Format: DRL-001-DOMESTIC-SYS-FG-INTERRUPT-01
"""
errors = []
rules = ir.get("rules", [])
rule_ids = [r.get("rule_id", "") for r in rules]
# No duplicates
duplicates = [rid for rid, count in Counter(rule_ids).items() if count > 1]
if duplicates:
errors.append(f"重复 rule_id: {duplicates}")
# New hierarchical naming convention
pattern = re.compile(
r"^[A-Z]+-\d{3}-(DOMESTIC|OVERSEAS)-"
r"(SYS|SDK|OTHER)-"
r"(FG-INTERRUPT|BG-BLOCK|BG-PAUSE|NO-RESTRICT|SWITCH-OFF)-\d{2}$"
)
for rid in rule_ids:
if rid and not pattern.match(rid):
errors.append(
f"rule_id 命名不规范: '{rid}' "
f"(期望: FEATURE-SCOPE-METHOD-BEHAVIOR-NN)"
)
return errors
def check_top_level_structure(ir: dict) -> list[str]:
"""Check that ir_final has the required top-level fields."""
errors = []
for field in ["feature", "feature_id", "rules"]:
if field not in ir:
errors.append(f"ir_final 缺少顶层字段: {field}")
if not isinstance(ir.get("rules"), list):
errors.append("ir_final.rules 必须是数组")
elif len(ir["rules"]) == 0:
errors.append("ir_final.rules 为空")
return errors
def check_rule_paths(rules: list[dict]) -> list[str]:
"""Every rule must have a non-empty path array."""
errors = []
for rule in rules:
rid = rule.get("rule_id", "?")
path = rule.get("path", [])
if not path:
errors.append(f"{rid}: path 字段为空或缺失")
return errors
def check_rule_completeness(rules: list[dict]) -> list[str]:
"""Check each rule has all required fields."""
errors = []
required_fields = [
"rule_id", "description", "priority", "sources",
"precondition", "trigger", "actions"
]
for i, rule in enumerate(rules):
rid = rule.get("rule_id", f"rule[{i}]")
for field in required_fields:
if field not in rule:
errors.append(f"{rid}: 缺少字段 '{field}'")
if not rule.get("sources"):
errors.append(f"{rid}: sources 为空")
if not rule.get("actions"):
errors.append(f"{rid}: actions 为空")
# Check precondition fields
precond = rule.get("precondition", {})
if not precond.get("geographic_scope"):
errors.append(f"{rid}: precondition.geographic_scope 缺失")
if "screen_type" not in precond:
errors.append(f"{rid}: precondition.screen_type 缺失")
return errors
def check_audit_report(report: str) -> list[str]:
"""Check audit report has all required sections."""
errors = []
required_sections = [
"逻辑树路径覆盖率",
"表格枚举覆盖",
"开关状态",
"一致性扫描报告",
"自动补全摘要",
"规则清单",
]
for section in required_sections:
if section not in report:
errors.append(f"审计报告缺少章节: {section}")
# Should have the human review notice
if "人工审查" not in report:
errors.append("审计报告缺少人工审查提示")
return errors
def run_all_tests():
print("=" * 60)
print("Step 3 自检测试")
print("=" * 60)
ir = load_ir_final()
report = load_audit_report()
rules = ir.get("rules", [])
all_errors = []
# Test 1: Top-level structure
errors = check_top_level_structure(ir)
if errors:
print(f"\n{FAIL} 顶层结构检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 顶层结构检查: 通过 "
f"(feature={ir.get('feature')}, feature_id={ir.get('feature_id')})")
# Test 2: rule_id uniqueness and naming
errors = check_rule_ids(ir)
if errors:
print(f"\n{FAIL} rule_id 检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} rule_id 检查: 全部通过 ({len(rules)} 个唯一 ID, 层次化格式)")
# Test 3: Rule path fields
errors = check_rule_paths(rules)
if errors:
print(f"\n{FAIL} 规则 path 字段: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 规则 path 字段: 全部通过")
# Test 4: Rule field completeness
errors = check_rule_completeness(rules)
if errors:
print(f"\n{FAIL} 规则字段完整性: {len(errors)} 个错误")
for e in errors[:10]:
print(f" - {e}")
if len(errors) > 10:
print(f" ... 还有 {len(errors) - 10}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 规则字段完整性: 全部通过")
# Test 5: Audit report content
errors = check_audit_report(report)
if errors:
print(f"\n{FAIL} 审计报告检查: {len(errors)} 个错误")
for e in errors:
print(f" - {e}")
all_errors.extend(errors)
else:
print(f"\n{PASS} 审计报告检查: 全部通过 (6 个章节)")
# Summary
print(f"\n{'='*60}")
total_failures = len(all_errors)
if total_failures == 0:
print(f"{PASS} 所有测试通过!")
print(f"\n最终交付物:")
print(f" - {config.IR_FINAL_JSON} ({len(rules)} 条规则)")
print(f" - {config.IR_AUDIT_REPORT_MD}")
else:
print(f"{FAIL} 测试失败: {total_failures} 个错误")
print("\n建议: 检查 ir_fragments.json 和合并逻辑,修复问题后重新运行 step3_merge_and_audit.py")
return total_failures == 0
if __name__ == "__main__":
success = run_all_tests()
sys.exit(0 if success else 1)