Files
document_analyzer/skills/ir_generation_skill/scripts/LLM.py
T
pzhang_zywl 40567a4fb6
CI / test (push) Successful in 30s
Initial commit: document_analyzer with CI/CD pipeline
- 4 skill pipeline (doc_parser, conflict_detection, ir_generation, resolution_application)
- CI workflow on push/PR (.gitea/workflows/ci.yml)
- Auto-issue on CI failure (.gitea/workflows/auto-issue.yml)
- Pytest smoke tests (tests/test_sample.py)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 20:00:26 +08:00

106 lines
3.6 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import logging
import os
import time
from typing import Optional
from openai import OpenAI
logger = logging.getLogger(__name__)
class LLMClient:
"""Low-level OpenAI-compatible LLM client with retry and token tracking.
Usage::
llm = LLMClient()
content = llm.chat("qwen3.5-flash", [{"role": "user", "content": "Hello"}])
print(llm.usage)
"""
IMAGE_MODEL = "qwen3-vl-plus"
TEXT_MODEL = "qwen3.5-flash-2026-02-23"
TIMEOUT = 120
MAX_RETRIES = 3
def __init__(
self,
*,
base_url: str = "https://dashscope.aliyuncs.com/compatible-mode/v1",
timeout: int | None = None,
):
key = os.environ.get("DASHSCOPE_API_KEY", "")
if not key:
raise ValueError("DASHSCOPE_API_KEY environment variable is not set.")
self._client = OpenAI(api_key=key, base_url=base_url)
self._timeout = timeout or self.TIMEOUT
self._prompt_tokens = 0
self._completion_tokens = 0
@property
def usage(self) -> dict:
"""Return accumulated token counts as ``{prompt, completion, total}``."""
return {
"prompt_tokens": self._prompt_tokens,
"completion_tokens": self._completion_tokens,
"total_tokens": self._prompt_tokens + self._completion_tokens,
}
@staticmethod
def estimate_tokens(text: str) -> int:
"""Quick token estimate. CJK ≈1.7/token, others ≈3.0/token."""
cjk = sum(1 for c in text if '' <= c <= '鿿' or ' ' <= c <= '')
other = len(text) - cjk
return max(1, int(cjk / 1.7 + other / 3.0))
@staticmethod
def estimate_image_tokens() -> int:
"""Fixed estimate for one vision-model image (~500 tokens)."""
return 500
def chat(
self, model: str, messages: list[dict], *, timeout: int | None = None,
response_format: dict | None = None,
) -> str:
"""Send a chat completion request and return the response content.
Automatically retries on failure and accumulates token usage.
"""
label = f"chat({model})"
def _call():
t0 = time.time()
kwargs = dict(model=model, messages=messages, timeout=timeout or self._timeout)
if response_format is not None:
kwargs["response_format"] = response_format
kwargs["temperature"] = 0
resp = self._client.chat.completions.create(**kwargs)
content = resp.choices[0].message.content
usg = resp.usage
if usg:
self._prompt_tokens += usg.prompt_tokens
self._completion_tokens += usg.completion_tokens
elapsed = time.time() - t0
logger.info("%s: %d chars in %.1fs", label, len(content) if content else 0, elapsed)
if not content:
raise RuntimeError("Empty response from LLM")
return content
return self._retry(_call, label)
def _retry(self, fn, label: str) -> str:
"""Call *fn()* with exponential-backoff retry."""
last_error: Optional[Exception] = None
for attempt in range(self.MAX_RETRIES):
try:
return fn()
except Exception as e:
last_error = e
logger.warning(
"%s error (attempt %d/%d): %s",
label, attempt + 1, self.MAX_RETRIES, e,
)
if attempt < self.MAX_RETRIES - 1:
time.sleep(2 ** attempt)
raise RuntimeError(f"{label}: all retries exhausted") from last_error