40567a4fb6
CI / test (push) Successful in 30s
- 4 skill pipeline (doc_parser, conflict_detection, ir_generation, resolution_application) - CI workflow on push/PR (.gitea/workflows/ci.yml) - Auto-issue on CI failure (.gitea/workflows/auto-issue.yml) - Pytest smoke tests (tests/test_sample.py) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
106 lines
3.6 KiB
Python
106 lines
3.6 KiB
Python
import logging
|
||
import os
|
||
import time
|
||
from typing import Optional
|
||
|
||
from openai import OpenAI
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
class LLMClient:
|
||
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
|
||
|
||
Usage::
|
||
|
||
llm = LLMClient()
|
||
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
|
||
print(llm.usage)
|
||
"""
|
||
|
||
IMAGE_MODEL = "qwen3-vl-plus"
|
||
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
|
||
TIMEOUT = 120
|
||
MAX_RETRIES = 3
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||
timeout: int | None = None,
|
||
):
|
||
key = os.environ.get("DASHSCOPE_API_KEY", "")
|
||
if not key:
|
||
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
|
||
self._client = OpenAI(api_key=key, base_url=base_url)
|
||
self._timeout = timeout or self.TIMEOUT
|
||
self._prompt_tokens = 0
|
||
self._completion_tokens = 0
|
||
|
||
@property
|
||
def usage(self) -> dict:
|
||
"""Return accumulated token counts as ``{prompt, completion, total}``."""
|
||
return {
|
||
"prompt_tokens": self._prompt_tokens,
|
||
"completion_tokens": self._completion_tokens,
|
||
"total_tokens": self._prompt_tokens + self._completion_tokens,
|
||
}
|
||
|
||
@staticmethod
|
||
def estimate_tokens(text: str) -> int:
|
||
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
|
||
cjk = sum(1 for c in text if '一' <= c <= '鿿' or ' ' <= c <= '〿')
|
||
other = len(text) - cjk
|
||
return max(1, int(cjk / 1.7 + other / 3.0))
|
||
|
||
@staticmethod
|
||
def estimate_image_tokens() -> int:
|
||
"""Fixed estimate for one vision-model image (~500 tokens)."""
|
||
return 500
|
||
|
||
def chat(
|
||
self, model: str, messages: list[dict], *, timeout: int | None = None,
|
||
response_format: dict | None = None,
|
||
) -> str:
|
||
"""Send a chat completion request and return the response content.
|
||
|
||
Automatically retries on failure and accumulates token usage.
|
||
"""
|
||
label = f"chat({model})"
|
||
|
||
def _call():
|
||
t0 = time.time()
|
||
kwargs = dict(model=model, messages=messages, timeout=timeout or self._timeout)
|
||
if response_format is not None:
|
||
kwargs["response_format"] = response_format
|
||
kwargs["temperature"] = 0
|
||
resp = self._client.chat.completions.create(**kwargs)
|
||
content = resp.choices[0].message.content
|
||
usg = resp.usage
|
||
if usg:
|
||
self._prompt_tokens += usg.prompt_tokens
|
||
self._completion_tokens += usg.completion_tokens
|
||
elapsed = time.time() - t0
|
||
logger.info("%s: %d chars in %.1fs", label, len(content) if content else 0, elapsed)
|
||
if not content:
|
||
raise RuntimeError("Empty response from LLM")
|
||
return content
|
||
|
||
return self._retry(_call, label)
|
||
|
||
def _retry(self, fn, label: str) -> str:
|
||
"""Call *fn()* with exponential-backoff retry."""
|
||
last_error: Optional[Exception] = None
|
||
for attempt in range(self.MAX_RETRIES):
|
||
try:
|
||
return fn()
|
||
except Exception as e:
|
||
last_error = e
|
||
logger.warning(
|
||
"%s error (attempt %d/%d): %s",
|
||
label, attempt + 1, self.MAX_RETRIES, e,
|
||
)
|
||
if attempt < self.MAX_RETRIES - 1:
|
||
time.sleep(2 ** attempt)
|
||
raise RuntimeError(f"{label}: all retries exhausted") from last_error
|