import full eventflow project
This commit is contained in:
99
backend/fastapi_app/services/retry_worker.py
Normal file
99
backend/fastapi_app/services/retry_worker.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
from .llm_extract import extract_event
|
||||
from .store import ensure_schema, get_store, insert_event_result
|
||||
|
||||
|
||||
def _pick_pending(conn, *, batch: int, max_attempts: int, min_age_s: int, retry_after_s: int):
|
||||
now = int(time.time())
|
||||
# Select items that are not successfully parsed yet.
|
||||
# We rely on raw_items.status/parse_attempts which are updated by insert_event_result.
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, title, content, lang, status, parse_attempts, last_parse_at
|
||||
FROM raw_items
|
||||
WHERE status != 'parsed_ok'
|
||||
AND COALESCE(parse_attempts, 0) < ?
|
||||
AND (? - created_at) >= ?
|
||||
AND (last_parse_at IS NULL OR (? - last_parse_at) >= ?)
|
||||
ORDER BY id ASC
|
||||
LIMIT ?
|
||||
""",
|
||||
(max_attempts, now, min_age_s, now, retry_after_s, batch),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def run_retry_loop(stop: threading.Event) -> None:
|
||||
if os.environ.get("PARSE_RETRY_ENABLED", "1") not in ("1", "true", "TRUE", "yes", "YES"):
|
||||
return
|
||||
|
||||
interval_s = int(os.environ.get("PARSE_RETRY_INTERVAL_S", "120"))
|
||||
batch = int(os.environ.get("PARSE_RETRY_BATCH", "5"))
|
||||
max_attempts = int(os.environ.get("PARSE_RETRY_MAX_ATTEMPTS", "6"))
|
||||
min_age_s = int(os.environ.get("PARSE_RETRY_MIN_AGE_S", "2"))
|
||||
retry_after_s = int(os.environ.get("PARSE_RETRY_AFTER_S", "300"))
|
||||
sleep_between_s = float(os.environ.get("PARSE_RETRY_SLEEP_BETWEEN_S", "0.5"))
|
||||
|
||||
st = get_store()
|
||||
|
||||
while not stop.is_set():
|
||||
try:
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
items = _pick_pending(
|
||||
conn,
|
||||
batch=batch,
|
||||
max_attempts=max_attempts,
|
||||
min_age_s=min_age_s,
|
||||
retry_after_s=retry_after_s,
|
||||
)
|
||||
|
||||
for it in items:
|
||||
if stop.is_set():
|
||||
break
|
||||
|
||||
# Jitter to reduce gateway burstiness.
|
||||
time.sleep(sleep_between_s + random.random() * 0.4)
|
||||
|
||||
title = (it.get("title") or "")[:500]
|
||||
content = (it.get("content") or "")[:20000]
|
||||
lang_hint = it.get("lang")
|
||||
|
||||
res = extract_event(title=title, content=content, lang_hint=lang_hint)
|
||||
|
||||
if res.get("ok") is True:
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=int(it["id"]),
|
||||
model=str(res.get("model") or ""),
|
||||
ok=True,
|
||||
event_json=json.dumps(res.get("event"), ensure_ascii=True),
|
||||
error=None,
|
||||
)
|
||||
else:
|
||||
# Preserve hint for debugging (no secrets).
|
||||
err = str(res.get("error") or "unknown")
|
||||
if err == "llm_failed":
|
||||
exc = str(res.get("exc") or "")
|
||||
if exc:
|
||||
err = f"{err}:{exc}"
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=int(it["id"]),
|
||||
model=str(res.get("model") or ""),
|
||||
ok=False,
|
||||
event_json=None,
|
||||
error=err,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# Keep the loop alive; details are in DB (last_error) or uvicorn logs.
|
||||
pass
|
||||
|
||||
stop.wait(interval_s)
|
||||
Reference in New Issue
Block a user