Files
document_analyzer/skills/ir_generation_skill/config.py
pzhang_zywl 884848f15f
CI / test (pull_request) Successful in 7s
fix: 统一输出文件目录结构 - Closes #3
- 新增 PROJECT_OUTPUT (项目根/output/),统一所有输出文件
- IR 中间产物 → output/ir/,最终交付物 → output/final/
- agent_poller.py 新增 pr-status/merge-pr/close-issue/lifecycle 命令
- DEV_AGENT.md 同步更新完整闭环流程
- 更新 conftest/test_sample 中的默认路径

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-31 14:38:37 +08:00

145 lines
5.0 KiB
Python

"""
Shared configuration for the IR Generation pipeline.
Reads API keys from a secrets.yaml file, falling back to environment variables.
"""
import os
import json
import yaml
# ---- Paths ----
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
WORKSPACE_DIR = os.path.dirname(BASE_DIR)
PROJECT_ROOT = os.path.dirname(WORKSPACE_DIR)
PROJECT_OUTPUT = os.path.join(PROJECT_ROOT, "output")
# Subdirectories under PROJECT_OUTPUT
IR_OUTPUT = os.path.join(PROJECT_OUTPUT, "ir")
FINAL_OUTPUT = os.path.join(PROJECT_OUTPUT, "final")
# Legacy paths (maintained for doc_parser integration)
DOC_PARSER_OUTPUT = os.path.join(WORKSPACE_DIR, "doc_parser_skill", "output")
PROMPTS_DIR = os.path.join(BASE_DIR, "prompts")
TESTS_DIR = os.path.join(BASE_DIR, "tests")
OUTPUT_DIR = IR_OUTPUT # backward compatibility alias
# Input file (the parsed PRD JSON)
_DEFAULT_INPUT = os.path.join(
PROJECT_OUTPUT, "车机娱乐系统禁止功能文档_脱敏 v0.9_v2_updated.json",
)
INPUT_JSON = os.environ.get("IR_INPUT_JSON", _DEFAULT_INPUT)
def set_input_file(path: str) -> None:
"""Override the default input JSON path."""
global INPUT_JSON
INPUT_JSON = path
# Secrets file (shared with workspace-document-analyzer)
# .openclaw/workspace/skills/ir_generation_new_skill -> .openclaw/workspace-document-analyzer
OPENCLAW_HOME = os.path.dirname(os.path.dirname(WORKSPACE_DIR))
SECRETS_YAML = os.path.join(
OPENCLAW_HOME, "workspace-document-analyzer", "config", "secrets.yaml",
)
# Intermediate outputs (all under PROJECT_OUTPUT/ir/)
SEMANTIC_INDEX_R1_JSON = os.path.join(IR_OUTPUT, "semantic_index_r1.json")
SEMANTIC_INDEX_R2_JSON = os.path.join(IR_OUTPUT, "semantic_index_r2.json")
SEMANTIC_INDEX_R3_JSON = os.path.join(IR_OUTPUT, "semantic_index_r3.json")
SEMANTIC_INDEX_JSON = os.path.join(IR_OUTPUT, "semantic_index.json")
IR_FRAGMENTS_JSON = os.path.join(IR_OUTPUT, "ir_fragments.json")
PATH_ENUM_JSON = os.path.join(IR_OUTPUT, "path_enumeration.json")
IR_AUTOCOMPLETE_FRAGMENTS_JSON = os.path.join(IR_OUTPUT, "ir_autocomplete_fragments.json")
# Final deliverables (under PROJECT_OUTPUT/final/)
IR_FINAL_JSON = os.path.join(FINAL_OUTPUT, "ir_final.json")
IR_AUDIT_REPORT_MD = os.path.join(FINAL_OUTPUT, "ir_audit_report.md")
# ---- LLM API ----
# Choose provider: "deepseek" | "dashscope"
LLM_PROVIDER = os.environ.get("IR_PROVIDER", "deepseek")
# Model names per provider
PROVIDER_MODELS = {
"deepseek": os.environ.get("IR_MODEL", "deepseek-v4-flash"),
"dashscope": os.environ.get("IR_MODEL", "qwen-max"),
}
MODEL_NAME = PROVIDER_MODELS.get(LLM_PROVIDER, PROVIDER_MODELS["deepseek"])
# Maximum tokens for LLM responses
MAX_TOKENS = int(os.environ.get("IR_MAX_TOKENS", "16000"))
TEMPERATURE = float(os.environ.get("IR_TEMPERATURE", "0.1"))
# ---- Iteration & Quality ----
MAX_RETRIES_PER_STAGE = int(os.environ.get("IR_MAX_RETRIES", "3"))
COVERAGE_TARGET = float(os.environ.get("IR_COVERAGE_TARGET", "0.95"))
# Stage 1 ensemble temperatures (parallel multi-temperature generation)
ENSEMBLE_TEMPERATURES = [
float(os.environ.get("IR_ENSEMBLE_T1", "0.0")),
float(os.environ.get("IR_ENSEMBLE_T2", "0.3")),
float(os.environ.get("IR_ENSEMBLE_T3", "0.7")),
]
def _load_secrets() -> dict[str, dict[str, str]]:
"""Load provider credentials from secrets.yaml.
Returns a dict like: {"deepseek": {"apiKey": "...", "baseUrl": "..."}, ...}
"""
if os.path.isfile(SECRETS_YAML):
with open(SECRETS_YAML, "r", encoding="utf-8") as f:
return yaml.safe_load(f) or {}
return {}
def _get_provider_config(provider: str) -> dict[str, str]:
"""Get {apiKey, baseUrl} for a provider from secrets, with env-var fallback."""
secrets = _load_secrets()
entry = secrets.get(provider, {})
env_prefix = provider.upper()
api_key = (
os.environ.get(f"{env_prefix}_API_KEY")
or entry.get("apiKey", "")
)
base_url = (
os.environ.get(f"{env_prefix}_BASE_URL")
or entry.get("baseUrl", "https://api.deepseek.com/v1")
)
if not api_key:
raise RuntimeError(
f"No API key found for provider '{provider}'. "
f"Check {SECRETS_YAML} or set {env_prefix}_API_KEY."
)
return {"apiKey": api_key, "baseUrl": base_url}
def llm_client():
"""Return an OpenAI-compatible client configured from secrets.yaml."""
from openai import OpenAI
cfg = _get_provider_config(LLM_PROVIDER)
return OpenAI(base_url=cfg["baseUrl"], api_key=cfg["apiKey"])
def load_input_document(path: str | None = None) -> dict:
"""Load the parsed PRD JSON document."""
path = path or INPUT_JSON
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def save_json(data, path: str) -> None:
"""Save data as formatted JSON."""
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def load_json(path: str) -> dict:
"""Load a JSON file."""
with open(path, "r", encoding="utf-8") as f:
return json.load(f)