sync: update all skills from latest workspace code

doc_parser_skill: - New: verify_flowchart.py (flowchart validation) - Updated: LLM.py (multi-provider: DeepSeek + DashScope) - Updated: image_parser.py (logic tree support, external prompts) - Updated: SKILL.md, prompts/image_prompt.md conflict_detection_skill: - Updated: LLM.py (multi-provider sync) - Updated: detect_conflicts.py (logic tree text conversion) ir_generation_skill: - Replaced old scripts/LLM.py + ir_generator.py with standalone project - New: main.py, config.py, step1-3_*.py, ensemble_merge.py - New: prompts/, tests/ subdirectories tests: - New: acceptance/ test suite with schema validation - Fixed: conftest no longer globally skips non-acceptance tests - Updated: test_sample.py for new ir_generation structure Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-30 22:45:08 +08:00
parent db64df2da1
commit fec4c09ee0
35 changed files with 8021 additions and 530 deletions
@@ -0,0 +1,137 @@
+"""
+Shared configuration for the IR Generation pipeline.
+Reads API keys from a secrets.yaml file, falling back to environment variables.
+"""
+
+import os
+import json
+import yaml
+
+# ---- Paths ----
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+WORKSPACE_DIR = os.path.dirname(BASE_DIR)
+DOC_PARSER_OUTPUT = os.path.join(WORKSPACE_DIR, "doc_parser_skill", "output")
+PROMPTS_DIR = os.path.join(BASE_DIR, "prompts")
+TESTS_DIR = os.path.join(BASE_DIR, "tests")
+OUTPUT_DIR = os.path.join(BASE_DIR, "output")
+
+# Input file (the parsed PRD JSON)
+_DEFAULT_INPUT = os.path.join(
+    DOC_PARSER_OUTPUT,
+    "车机娱乐系统禁止功能文档_脱敏 v0.9_v2_updated.json",
+)
+INPUT_JSON = os.environ.get("IR_INPUT_JSON", _DEFAULT_INPUT)
+
+
+def set_input_file(path: str) -> None:
+    """Override the default input JSON path."""
+    global INPUT_JSON
+    INPUT_JSON = path
+
+# Secrets file (shared with workspace-document-analyzer)
+# .openclaw/workspace/skills/ir_generation_new_skill -> .openclaw/workspace-document-analyzer
+OPENCLAW_HOME = os.path.dirname(os.path.dirname(WORKSPACE_DIR))
+SECRETS_YAML = os.path.join(
+    OPENCLAW_HOME, "workspace-document-analyzer", "config", "secrets.yaml",
+)
+
+# Intermediate outputs
+SEMANTIC_INDEX_R1_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r1.json")
+SEMANTIC_INDEX_R2_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r2.json")
+SEMANTIC_INDEX_R3_JSON = os.path.join(OUTPUT_DIR, "semantic_index_r3.json")
+SEMANTIC_INDEX_JSON = os.path.join(OUTPUT_DIR, "semantic_index.json")   # merged final
+IR_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_fragments.json")
+PATH_ENUM_JSON = os.path.join(OUTPUT_DIR, "path_enumeration.json")
+IR_AUTOCOMPLETE_FRAGMENTS_JSON = os.path.join(OUTPUT_DIR, "ir_autocomplete_fragments.json")
+
+# Final deliverables (placed in doc_parser output per spec)
+IR_FINAL_JSON = os.path.join(DOC_PARSER_OUTPUT, "ir_final.json")
+IR_AUDIT_REPORT_MD = os.path.join(DOC_PARSER_OUTPUT, "ir_audit_report.md")
+
+# ---- LLM API ----
+# Choose provider: "deepseek" | "dashscope"
+LLM_PROVIDER = os.environ.get("IR_PROVIDER", "deepseek")
+
+# Model names per provider
+PROVIDER_MODELS = {
+    "deepseek": os.environ.get("IR_MODEL", "deepseek-v4-flash"),
+    "dashscope": os.environ.get("IR_MODEL", "qwen-max"),
+}
+MODEL_NAME = PROVIDER_MODELS.get(LLM_PROVIDER, PROVIDER_MODELS["deepseek"])
+
+# Maximum tokens for LLM responses
+MAX_TOKENS = int(os.environ.get("IR_MAX_TOKENS", "16000"))
+TEMPERATURE = float(os.environ.get("IR_TEMPERATURE", "0.1"))
+
+# ---- Iteration & Quality ----
+MAX_RETRIES_PER_STAGE = int(os.environ.get("IR_MAX_RETRIES", "3"))
+COVERAGE_TARGET = float(os.environ.get("IR_COVERAGE_TARGET", "0.95"))
+
+# Stage 1 ensemble temperatures (parallel multi-temperature generation)
+ENSEMBLE_TEMPERATURES = [
+    float(os.environ.get("IR_ENSEMBLE_T1", "0.0")),
+    float(os.environ.get("IR_ENSEMBLE_T2", "0.3")),
+    float(os.environ.get("IR_ENSEMBLE_T3", "0.7")),
+]
+
+
+def _load_secrets() -> dict[str, dict[str, str]]:
+    """Load provider credentials from secrets.yaml.
+
+    Returns a dict like: {"deepseek": {"apiKey": "...", "baseUrl": "..."}, ...}
+    """
+    if os.path.isfile(SECRETS_YAML):
+        with open(SECRETS_YAML, "r", encoding="utf-8") as f:
+            return yaml.safe_load(f) or {}
+    return {}
+
+
+def _get_provider_config(provider: str) -> dict[str, str]:
+    """Get {apiKey, baseUrl} for a provider from secrets, with env-var fallback."""
+    secrets = _load_secrets()
+    entry = secrets.get(provider, {})
+
+    env_prefix = provider.upper()
+    api_key = (
+        os.environ.get(f"{env_prefix}_API_KEY")
+        or entry.get("apiKey", "")
+    )
+    base_url = (
+        os.environ.get(f"{env_prefix}_BASE_URL")
+        or entry.get("baseUrl", "https://api.deepseek.com/v1")
+    )
+
+    if not api_key:
+        raise RuntimeError(
+            f"No API key found for provider '{provider}'. "
+            f"Check {SECRETS_YAML} or set {env_prefix}_API_KEY."
+        )
+    return {"apiKey": api_key, "baseUrl": base_url}
+
+
+def llm_client():
+    """Return an OpenAI-compatible client configured from secrets.yaml."""
+    from openai import OpenAI
+
+    cfg = _get_provider_config(LLM_PROVIDER)
+    return OpenAI(base_url=cfg["baseUrl"], api_key=cfg["apiKey"])
+
+
+def load_input_document(path: str | None = None) -> dict:
+    """Load the parsed PRD JSON document."""
+    path = path or INPUT_JSON
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+
+
+def save_json(data, path: str) -> None:
+    """Save data as formatted JSON."""
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+
+
+def load_json(path: str) -> dict:
+    """Load a JSON file."""
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)