import full eventflow project

This commit is contained in:
2026-03-13 17:18:19 +08:00
parent 0edc8f7477
commit bbae58e4fe
21 changed files with 1023 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
import json
import os
import random
import threading
import time
from .llm_extract import extract_event
from .store import ensure_schema, get_store, insert_event_result
def _pick_pending(conn, *, batch: int, max_attempts: int, min_age_s: int, retry_after_s: int):
now = int(time.time())
# Select items that are not successfully parsed yet.
# We rely on raw_items.status/parse_attempts which are updated by insert_event_result.
rows = conn.execute(
"""
SELECT id, title, content, lang, status, parse_attempts, last_parse_at
FROM raw_items
WHERE status != 'parsed_ok'
AND COALESCE(parse_attempts, 0) < ?
AND (? - created_at) >= ?
AND (last_parse_at IS NULL OR (? - last_parse_at) >= ?)
ORDER BY id ASC
LIMIT ?
""",
(max_attempts, now, min_age_s, now, retry_after_s, batch),
).fetchall()
return [dict(r) for r in rows]
def run_retry_loop(stop: threading.Event) -> None:
if os.environ.get("PARSE_RETRY_ENABLED", "1") not in ("1", "true", "TRUE", "yes", "YES"):
return
interval_s = int(os.environ.get("PARSE_RETRY_INTERVAL_S", "120"))
batch = int(os.environ.get("PARSE_RETRY_BATCH", "5"))
max_attempts = int(os.environ.get("PARSE_RETRY_MAX_ATTEMPTS", "6"))
min_age_s = int(os.environ.get("PARSE_RETRY_MIN_AGE_S", "2"))
retry_after_s = int(os.environ.get("PARSE_RETRY_AFTER_S", "300"))
sleep_between_s = float(os.environ.get("PARSE_RETRY_SLEEP_BETWEEN_S", "0.5"))
st = get_store()
while not stop.is_set():
try:
conn = st.connect()
ensure_schema(conn)
items = _pick_pending(
conn,
batch=batch,
max_attempts=max_attempts,
min_age_s=min_age_s,
retry_after_s=retry_after_s,
)
for it in items:
if stop.is_set():
break
# Jitter to reduce gateway burstiness.
time.sleep(sleep_between_s + random.random() * 0.4)
title = (it.get("title") or "")[:500]
content = (it.get("content") or "")[:20000]
lang_hint = it.get("lang")
res = extract_event(title=title, content=content, lang_hint=lang_hint)
if res.get("ok") is True:
insert_event_result(
conn,
raw_item_id=int(it["id"]),
model=str(res.get("model") or ""),
ok=True,
event_json=json.dumps(res.get("event"), ensure_ascii=True),
error=None,
)
else:
# Preserve hint for debugging (no secrets).
err = str(res.get("error") or "unknown")
if err == "llm_failed":
exc = str(res.get("exc") or "")
if exc:
err = f"{err}:{exc}"
insert_event_result(
conn,
raw_item_id=int(it["id"]),
model=str(res.get("model") or ""),
ok=False,
event_json=None,
error=err,
)
except Exception:
# Keep the loop alive; details are in DB (last_error) or uvicorn logs.
pass
stop.wait(interval_s)