import full eventflow project
This commit is contained in:
13
.env.example
Normal file
13
.env.example
Normal file
@@ -0,0 +1,13 @@
|
||||
# LLM (OpenAI-compatible gateway)
|
||||
LLM_BASE_URL=http://129.204.192.37:23000/v1
|
||||
LLM_API_KEY=sk-REDACTED
|
||||
LLM_MODEL=gpt-5.2
|
||||
|
||||
# DB
|
||||
DATABASE_URL=sqlite:///./eventflow.sqlite3
|
||||
|
||||
# Ingest tuning
|
||||
FETCH_UA=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123 Safari/537.36
|
||||
FETCH_RPS_PER_DOMAIN=0.2
|
||||
FETCH_TIMEOUT_SECS=15
|
||||
FETCH_MAX_RETRIES=3
|
||||
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
.env
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
__pycache__/
|
||||
*.pyc
|
||||
.pytest_cache/
|
||||
*.log
|
||||
0
backend/__init__.py
Normal file
0
backend/__init__.py
Normal file
0
backend/fastapi_app/__init__.py
Normal file
0
backend/fastapi_app/__init__.py
Normal file
0
backend/fastapi_app/kb/__init__.py
Normal file
0
backend/fastapi_app/kb/__init__.py
Normal file
0
backend/fastapi_app/kb/rules/__init__.py
Normal file
0
backend/fastapi_app/kb/rules/__init__.py
Normal file
1
backend/fastapi_app/kb/rules/rules.yml
Normal file
1
backend/fastapi_app/kb/rules/rules.yml
Normal file
@@ -0,0 +1 @@
|
||||
# V1 rules placeholder
|
||||
27
backend/fastapi_app/main.py
Normal file
27
backend/fastapi_app/main.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import threading
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from .routers import analyze, ingest, query
|
||||
from .services.retry_worker import run_retry_loop
|
||||
|
||||
app = FastAPI(title="eventflow-fastapi", version="0.1.0")
|
||||
|
||||
app.include_router(ingest.router, prefix="/ingest", tags=["ingest"])
|
||||
app.include_router(analyze.router, prefix="/analyze", tags=["analyze"])
|
||||
app.include_router(query.router, prefix="/query", tags=["query"])
|
||||
|
||||
|
||||
_stop = threading.Event()
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
def _startup() -> None:
|
||||
# Background retry loop so transient gateway 5xx doesn't permanently stall parsing.
|
||||
t = threading.Thread(target=run_retry_loop, args=(_stop,), daemon=True)
|
||||
t.start()
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
def _shutdown() -> None:
|
||||
_stop.set()
|
||||
0
backend/fastapi_app/routers/__init__.py
Normal file
0
backend/fastapi_app/routers/__init__.py
Normal file
58
backend/fastapi_app/routers/analyze.py
Normal file
58
backend/fastapi_app/routers/analyze.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from ..services.llm_extract import extract_event
|
||||
from ..services.store import ensure_schema, get_store, insert_event_result, insert_raw_item
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.post("/event")
|
||||
def analyze_event(payload: dict):
|
||||
"""Analyze one ad-hoc item and persist it.
|
||||
|
||||
This powers the UI while we build full ingesters.
|
||||
"""
|
||||
|
||||
title = (payload.get("title") or "").strip()
|
||||
content = (payload.get("content") or "").strip()
|
||||
if not title and not content:
|
||||
raise HTTPException(status_code=400, detail="missing title/content")
|
||||
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
raw_item_id = insert_raw_item(
|
||||
conn,
|
||||
source=str(payload.get("source") or "manual"),
|
||||
item_date=str(payload.get("date") or "") or "manual",
|
||||
title=title[:500],
|
||||
content=content[:20_000],
|
||||
url=payload.get("url"),
|
||||
published_at=payload.get("published_at"),
|
||||
lang=payload.get("lang"),
|
||||
)
|
||||
|
||||
res = extract_event(title=title, content=content, lang_hint=payload.get("lang"))
|
||||
|
||||
if res.get("ok") is True:
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=raw_item_id,
|
||||
model=str(res.get("model") or ""),
|
||||
ok=True,
|
||||
event_json=json.dumps(res.get("event"), ensure_ascii=True),
|
||||
error=None,
|
||||
)
|
||||
else:
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=raw_item_id,
|
||||
model=str(res.get("model") or ""),
|
||||
ok=False,
|
||||
event_json=None,
|
||||
error=str(res.get("error") or "unknown"),
|
||||
)
|
||||
|
||||
return {"raw_item_id": raw_item_id, **res}
|
||||
214
backend/fastapi_app/routers/ingest.py
Normal file
214
backend/fastapi_app/routers/ingest.py
Normal file
@@ -0,0 +1,214 @@
|
||||
import json
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from ..services.llm_extract import extract_event
|
||||
from ..services.market_moves import fetch_moves_via_qfr
|
||||
from ..services.store import (
|
||||
ensure_schema,
|
||||
get_store,
|
||||
insert_event_result,
|
||||
insert_raw_item,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
def _today() -> str:
|
||||
return date.today().isoformat()
|
||||
|
||||
|
||||
@router.post("/rss")
|
||||
def ingest_rss(payload: dict):
|
||||
"""Ingest one or many RSS items.
|
||||
|
||||
Expected payload:
|
||||
{"items": [{"title":..., "url":..., "published_at":..., "summary":..., "lang":...}, ...]}
|
||||
"""
|
||||
items = payload.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
|
||||
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
n = 0
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
insert_raw_item(
|
||||
conn,
|
||||
source="rss",
|
||||
item_date=payload.get("date") or _today(),
|
||||
title=(it.get("title") or "")[:500],
|
||||
content=(it.get("summary") or it.get("content") or "")[:20_000],
|
||||
url=it.get("url"),
|
||||
published_at=it.get("published_at"),
|
||||
lang=it.get("lang"),
|
||||
)
|
||||
n += 1
|
||||
|
||||
return {"ok": True, "inserted": n}
|
||||
|
||||
|
||||
@router.post("/macro")
|
||||
def ingest_macro(payload: dict):
|
||||
items = payload.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
|
||||
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
n = 0
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
insert_raw_item(
|
||||
conn,
|
||||
source="macro",
|
||||
item_date=payload.get("date") or _today(),
|
||||
title=(it.get("title") or "")[:500],
|
||||
content=(it.get("content") or "")[:20_000],
|
||||
url=it.get("url"),
|
||||
published_at=it.get("published_at"),
|
||||
lang=it.get("lang"),
|
||||
)
|
||||
n += 1
|
||||
|
||||
return {"ok": True, "inserted": n}
|
||||
|
||||
|
||||
@router.post("/market_moves")
|
||||
def ingest_market_moves(payload: dict):
|
||||
items = payload.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
|
||||
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
n = 0
|
||||
for it in items:
|
||||
if not isinstance(it, dict):
|
||||
continue
|
||||
insert_raw_item(
|
||||
conn,
|
||||
source="market_moves",
|
||||
item_date=payload.get("date") or _today(),
|
||||
title=(it.get("title") or "")[:500],
|
||||
content=(it.get("content") or "")[:20_000],
|
||||
url=it.get("url"),
|
||||
published_at=it.get("published_at"),
|
||||
lang=it.get("lang"),
|
||||
)
|
||||
n += 1
|
||||
|
||||
return {"ok": True, "inserted": n}
|
||||
|
||||
|
||||
@router.post("/market_moves/run")
|
||||
def run_market_moves(payload: dict | None = None):
|
||||
"""Generate daily market-move items from QFR raw data and parse them into events."""
|
||||
|
||||
payload = payload or {}
|
||||
day = str(payload.get("date") or _today())
|
||||
# QFR raw data uses trade_date like YYYYMMDD.
|
||||
trade_date = day.replace("-", "")
|
||||
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
data = fetch_moves_via_qfr(trade_date=trade_date, symbols=payload.get("symbols"))
|
||||
if not data.get("ok"):
|
||||
raise HTTPException(status_code=500, detail=data)
|
||||
|
||||
inserted = 0
|
||||
parsed_ok = 0
|
||||
parsed_err = 0
|
||||
|
||||
for mv in data.get("moves", []):
|
||||
sym = mv.get("symbol")
|
||||
td = mv.get("trade_date")
|
||||
ret_1d = mv.get("ret_1d")
|
||||
vol_20d = mv.get("vol_20d")
|
||||
z_1d = mv.get("z_1d")
|
||||
|
||||
title = f"Market move {sym} {td}: ret_1d={ret_1d:.4f} z_1d={z_1d:.2f}" if isinstance(ret_1d, (int, float)) and isinstance(z_1d, (int, float)) else f"Market move {sym} {td}"
|
||||
content = (
|
||||
f"symbol={sym}\n"
|
||||
f"trade_date={td}\n"
|
||||
f"prev_trade_date={mv.get('prev_trade_date')}\n"
|
||||
f"close={mv.get('close')} prev_close={mv.get('prev_close')}\n"
|
||||
f"ret_1d={ret_1d} vol_20d={vol_20d} z_1d={z_1d}\n"
|
||||
"Interpretation task: explain the most likely macro/industry drivers for this move and which assets could be affected."
|
||||
)
|
||||
|
||||
raw_item_id = insert_raw_item(
|
||||
conn,
|
||||
source="market_moves",
|
||||
item_date=day,
|
||||
title=title[:500],
|
||||
content=content[:20_000],
|
||||
url=None,
|
||||
published_at=None,
|
||||
lang="en",
|
||||
)
|
||||
inserted += 1
|
||||
|
||||
try:
|
||||
res = extract_event(title=title, content=content, lang_hint="en")
|
||||
except Exception as e:
|
||||
# Network/provider errors should not abort the whole batch.
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=raw_item_id,
|
||||
model="",
|
||||
ok=False,
|
||||
event_json=None,
|
||||
error=f"llm_exception:{type(e).__name__}",
|
||||
)
|
||||
parsed_err += 1
|
||||
continue
|
||||
|
||||
if res.get("ok") is True:
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=raw_item_id,
|
||||
model=str(res.get("model") or ""),
|
||||
ok=True,
|
||||
event_json=json.dumps(res.get("event"), ensure_ascii=True),
|
||||
error=None,
|
||||
)
|
||||
parsed_ok += 1
|
||||
else:
|
||||
err = str(res.get("error") or "unknown")
|
||||
if err == "llm_failed":
|
||||
# Keep a short hint to debug gateway flakiness without dumping secrets.
|
||||
exc = str(res.get("exc") or "")
|
||||
if exc:
|
||||
err = f"{err}:{exc}"
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=raw_item_id,
|
||||
model=str(res.get("model") or ""),
|
||||
ok=False,
|
||||
event_json=None,
|
||||
error=err,
|
||||
)
|
||||
parsed_err += 1
|
||||
|
||||
return {
|
||||
"ok": True,
|
||||
"date": day,
|
||||
"inserted": inserted,
|
||||
"parsed_ok": parsed_ok,
|
||||
"parsed_err": parsed_err,
|
||||
"errors": data.get("errors", []),
|
||||
"symbols": data.get("symbols"),
|
||||
}
|
||||
215
backend/fastapi_app/routers/query.py
Normal file
215
backend/fastapi_app/routers/query.py
Normal file
@@ -0,0 +1,215 @@
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter
|
||||
from fastapi.responses import HTMLResponse
|
||||
|
||||
from ..services.store import counts, get_store, list_events, list_raw_items, sources_today
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
def health():
|
||||
return {"ok": True}
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
def status(day: str | None = None):
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
item_date = day or date.today().isoformat()
|
||||
return {
|
||||
"date": item_date,
|
||||
"counts": counts(conn),
|
||||
"sources": sources_today(conn, item_date),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/raw_items")
|
||||
def raw_items(limit: int = 20):
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
return {"items": list_raw_items(conn, limit=limit)}
|
||||
|
||||
|
||||
@router.get("/events")
|
||||
def events(limit: int = 20):
|
||||
st = get_store()
|
||||
conn = st.connect()
|
||||
return {"items": list_events(conn, limit=limit)}
|
||||
|
||||
|
||||
@router.get("/ui", response_class=HTMLResponse)
|
||||
def ui():
|
||||
# Tiny no-build UI for early validation.
|
||||
html = """<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"utf-8\" />
|
||||
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
|
||||
<title>EventFlow V1</title>
|
||||
<style>
|
||||
body { font-family: ui-sans-serif, system-ui, Arial; margin: 18px; }
|
||||
h1 { margin: 0 0 8px 0; }
|
||||
.row { display: flex; gap: 16px; flex-wrap: wrap; }
|
||||
.card { border: 1px solid #ddd; border-radius: 8px; padding: 12px; min-width: 320px; }
|
||||
table { border-collapse: collapse; width: 100%; }
|
||||
th, td { border-bottom: 1px solid #eee; padding: 6px 8px; font-size: 13px; vertical-align: top; }
|
||||
th { text-align: left; color: #444; }
|
||||
code { background: #f6f6f6; padding: 2px 4px; border-radius: 4px; }
|
||||
.muted { color: #666; }
|
||||
.ok { color: #0a7; font-weight: 600; }
|
||||
.bad { color: #b00; font-weight: 600; }
|
||||
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
|
||||
button { padding: 6px 10px; }
|
||||
input, textarea { width: 100%; box-sizing: border-box; }
|
||||
textarea { height: 90px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>EventFlow V1</h1>
|
||||
<div class=\"muted\">Status page: sources, dates, counts, and parsed events.</div>
|
||||
<div style=\"height:12px\"></div>
|
||||
|
||||
<div class=\"row\">
|
||||
<div class=\"card\" style=\"flex: 1\">
|
||||
<div style=\"display:flex; justify-content:space-between; align-items:center; gap:12px;\">
|
||||
<div>
|
||||
<div><b>Today</b>: <span id=\"today\"></span></div>
|
||||
<div class=\"muted\">Auto-refresh every 5s</div>
|
||||
</div>
|
||||
<button onclick=\"refreshAll()\">Refresh</button>
|
||||
</div>
|
||||
<div style=\"height:10px\"></div>
|
||||
<div id=\"counts\" class=\"mono\"></div>
|
||||
<div style=\"height:10px\"></div>
|
||||
<div><b>Sources (today)</b></div>
|
||||
<table id=\"sources\"></table>
|
||||
</div>
|
||||
|
||||
<div class=\"card\" style=\"flex: 1\">
|
||||
<div><b>Quick Analyze (manual)</b></div>
|
||||
<div class=\"muted\">This will save a raw item + parsed event into SQLite.</div>
|
||||
<div style=\"height:8px\"></div>
|
||||
<label>Title</label>
|
||||
<input id=\"m_title\" placeholder=\"e.g. Fed signals higher-for-longer rates\" />
|
||||
<div style=\"height:6px\"></div>
|
||||
<label>Content</label>
|
||||
<textarea id=\"m_content\" placeholder=\"Paste a paragraph...\"></textarea>
|
||||
<div style=\"height:6px\"></div>
|
||||
<button onclick=\"runAnalyze()\">Analyze</button>
|
||||
<div id=\"m_out\" class=\"mono\" style=\"white-space:pre-wrap; margin-top:10px;\"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style=\"height:16px\"></div>
|
||||
|
||||
<div class=\"row\">
|
||||
<div class=\"card\" style=\"flex: 1\">
|
||||
<div><b>Latest Raw Items</b> <span class=\"muted\">(limit 20)</span></div>
|
||||
<table id=\"raw\"></table>
|
||||
</div>
|
||||
<div class=\"card\" style=\"flex: 1\">
|
||||
<div><b>Latest Parsed Events</b> <span class=\"muted\">(limit 20)</span></div>
|
||||
<table id=\"events\"></table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function esc(s) {
|
||||
return (s ?? '').toString().replaceAll('&', '&').replaceAll('<', '<').replaceAll('>', '>');
|
||||
}
|
||||
|
||||
async function jget(url) {
|
||||
const r = await fetch(url);
|
||||
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||
return await r.json();
|
||||
}
|
||||
|
||||
function setTable(el, headers, rows) {
|
||||
let h = '<tr>' + headers.map(x => `<th>${esc(x)}</th>`).join('') + '</tr>';
|
||||
let b = rows.map(r => '<tr>' + r.map(x => `<td>${x}</td>`).join('') + '</tr>').join('');
|
||||
el.innerHTML = h + b;
|
||||
}
|
||||
|
||||
async function refreshStatus() {
|
||||
const st = await jget('/query/status');
|
||||
document.getElementById('today').textContent = st.date;
|
||||
const c = st.counts;
|
||||
document.getElementById('counts').textContent = `raw_items=${c.raw_items} events=${c.events} ok=${c.events_ok} err=${c.events_err}`;
|
||||
|
||||
const srows = (st.sources || []).map(x => [esc(x.source), esc(x.count)]);
|
||||
setTable(document.getElementById('sources'), ['source', 'count'], srows);
|
||||
}
|
||||
|
||||
async function refreshRaw() {
|
||||
const data = await jget('/query/raw_items?limit=20');
|
||||
const rows = (data.items || []).map(it => {
|
||||
const u = it.url ? `<a href="${esc(it.url)}" target="_blank">link</a>` : '';
|
||||
return [
|
||||
`<span class="mono">${esc(it.id)}</span>`,
|
||||
esc(it.source),
|
||||
esc(it.item_date),
|
||||
esc(it.lang || ''),
|
||||
esc((it.title || '').slice(0, 120)),
|
||||
u,
|
||||
];
|
||||
});
|
||||
setTable(document.getElementById('raw'), ['id', 'source', 'date', 'lang', 'title', 'url'], rows);
|
||||
}
|
||||
|
||||
async function refreshEvents() {
|
||||
const data = await jget('/query/events?limit=20');
|
||||
const rows = (data.items || []).map(it => {
|
||||
const cls = it.ok ? 'ok' : 'bad';
|
||||
let ev = '';
|
||||
try {
|
||||
if (it.event_json) {
|
||||
const obj = JSON.parse(it.event_json);
|
||||
ev = `${esc(obj.event_type)} dir=${esc(obj.impact_direction)} conf=${esc(obj.confidence)}<br/><span class="muted">${esc((obj.summary_en || obj.summary_zh || '').slice(0, 140))}</span>`;
|
||||
}
|
||||
} catch (e) {
|
||||
ev = '<span class="bad">bad json</span>';
|
||||
}
|
||||
return [
|
||||
`<span class="mono">${esc(it.id)}</span>`,
|
||||
esc(it.source),
|
||||
`<span class="${cls}">${it.ok ? 'ok' : 'err'}</span>`,
|
||||
esc((it.title || '').slice(0, 90)),
|
||||
ev,
|
||||
];
|
||||
});
|
||||
setTable(document.getElementById('events'), ['id', 'source', 'ok', 'raw_title', 'event'], rows);
|
||||
}
|
||||
|
||||
async function runAnalyze() {
|
||||
const title = document.getElementById('m_title').value;
|
||||
const content = document.getElementById('m_content').value;
|
||||
const out = document.getElementById('m_out');
|
||||
out.textContent = 'running...';
|
||||
const r = await fetch('/analyze/event', {
|
||||
method: 'POST',
|
||||
headers: {'Content-Type': 'application/json'},
|
||||
body: JSON.stringify({source: 'manual', date: new Date().toISOString().slice(0,10), title, content, lang: ''}),
|
||||
});
|
||||
const j = await r.json();
|
||||
out.textContent = JSON.stringify(j, null, 2);
|
||||
await refreshAll();
|
||||
}
|
||||
|
||||
async function refreshAll() {
|
||||
try {
|
||||
await refreshStatus();
|
||||
await refreshRaw();
|
||||
await refreshEvents();
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
refreshAll();
|
||||
setInterval(refreshAll, 5000);
|
||||
</script>
|
||||
</body>
|
||||
</html>"""
|
||||
return HTMLResponse(content=html)
|
||||
0
backend/fastapi_app/services/__init__.py
Normal file
0
backend/fastapi_app/services/__init__.py
Normal file
68
backend/fastapi_app/services/market_moves.py
Normal file
68
backend/fastapi_app/services/market_moves.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Any
|
||||
|
||||
|
||||
WATCHLIST_DEFAULT = [
|
||||
# A-share ETF proxies for indices (QFR raw data uses ETF parquets)
|
||||
# HS300
|
||||
"510300.SH",
|
||||
# ZZ500
|
||||
"510500.SH",
|
||||
# ChiNext
|
||||
"159915.SZ",
|
||||
# SSE50 proxy (may not exist in rawdir unless downloaded)
|
||||
"510050.SH",
|
||||
|
||||
# Futures placeholders (not in QFR rawdir by default; will show as errors until sourced)
|
||||
"AU.SHF",
|
||||
"CU.SHF",
|
||||
"M.DCE",
|
||||
"TA.CZCE",
|
||||
"SC.INE",
|
||||
]
|
||||
|
||||
|
||||
def fetch_moves_via_qfr(*, trade_date: str | None = None, symbols: list[str] | None = None) -> dict[str, Any]:
|
||||
"""Fetch day-level moves by shelling out to the existing qfr conda env.
|
||||
|
||||
Reason: eventflow env is kept minimal; qfr env already has pandas/pyarrow.
|
||||
"""
|
||||
|
||||
sym_list = symbols or WATCHLIST_DEFAULT
|
||||
rawdir = os.environ.get("QFR_RAWDIR", "/home/openclaw/projects/quant-factor-research/data/raw")
|
||||
|
||||
env = os.environ.copy()
|
||||
env["QFR_RAWDIR"] = rawdir
|
||||
env["QFR_SYMBOLS"] = ",".join(sym_list)
|
||||
if trade_date:
|
||||
env["QFR_TRADE_DATE"] = trade_date
|
||||
|
||||
conda = os.environ.get("CONDA_BIN", "/home/openclaw/miniconda3/bin/conda")
|
||||
script = os.path.join(os.path.dirname(__file__), "market_moves_qfr.py")
|
||||
|
||||
cmd = [conda, "run", "-n", "qfr", "python", script]
|
||||
proc = subprocess.run(cmd, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "qfr_subprocess_failed",
|
||||
"returncode": proc.returncode,
|
||||
"stderr": proc.stderr[-4000:],
|
||||
}
|
||||
|
||||
try:
|
||||
data = json.loads(proc.stdout)
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"ok": False,
|
||||
"error": "invalid_json_from_qfr",
|
||||
"stdout": proc.stdout[-2000:],
|
||||
"stderr": proc.stderr[-2000:],
|
||||
}
|
||||
|
||||
data["ok"] = True
|
||||
data["symbols"] = sym_list
|
||||
return data
|
||||
108
backend/fastapi_app/services/market_moves_qfr.py
Normal file
108
backend/fastapi_app/services/market_moves_qfr.py
Normal file
@@ -0,0 +1,108 @@
|
||||
import json
|
||||
import os
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
@dataclass
|
||||
class Move:
|
||||
symbol: str
|
||||
trade_date: str
|
||||
prev_trade_date: str
|
||||
close: float
|
||||
prev_close: float
|
||||
ret_1d: float
|
||||
vol_20d: float
|
||||
z_1d: float
|
||||
|
||||
|
||||
def _read_one(rawdir: str, symbol: str) -> pd.DataFrame:
|
||||
# QFR stores parquet as e.g. 510300SH.parquet / 159915SZ.parquet
|
||||
fn = symbol.replace(".", "") + ".parquet"
|
||||
p = os.path.join(rawdir, fn)
|
||||
df = pd.read_parquet(p)
|
||||
# Standardize
|
||||
if "trade_date" not in df.columns or "close" not in df.columns:
|
||||
raise RuntimeError(f"unexpected parquet schema for {symbol}: {df.columns.tolist()}")
|
||||
df = df.sort_values("trade_date").reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
def _calc_move(df: pd.DataFrame, symbol: str, trade_date: str | None) -> Move | None:
|
||||
if df.empty:
|
||||
return None
|
||||
# pick last available <= trade_date if given
|
||||
if trade_date:
|
||||
df2 = df[df["trade_date"] <= trade_date]
|
||||
if df2.empty:
|
||||
return None
|
||||
df = df2
|
||||
|
||||
if len(df) < 2:
|
||||
return None
|
||||
|
||||
# compute returns
|
||||
close = df["close"].astype(float)
|
||||
ret = close.pct_change()
|
||||
|
||||
i = len(df) - 1
|
||||
prev_i = i - 1
|
||||
|
||||
td = str(df.iloc[i]["trade_date"])
|
||||
ptd = str(df.iloc[prev_i]["trade_date"])
|
||||
|
||||
close_i = float(close.iloc[i])
|
||||
prev_close_i = float(close.iloc[prev_i])
|
||||
ret_1d = float(ret.iloc[i])
|
||||
|
||||
# vol over last 20 returns (excluding NaN)
|
||||
vol_20 = float(ret.iloc[max(0, i - 20 + 1) : i + 1].std(skipna=True))
|
||||
if not (vol_20 > 0):
|
||||
vol_20 = 0.0
|
||||
z = float(ret_1d / vol_20) if vol_20 > 0 else 0.0
|
||||
|
||||
return Move(
|
||||
symbol=symbol,
|
||||
trade_date=td,
|
||||
prev_trade_date=ptd,
|
||||
close=close_i,
|
||||
prev_close=prev_close_i,
|
||||
ret_1d=ret_1d,
|
||||
vol_20d=vol_20,
|
||||
z_1d=z,
|
||||
)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
rawdir = os.environ.get("QFR_RAWDIR") or "/home/openclaw/projects/quant-factor-research/data/raw"
|
||||
symbols = (os.environ.get("QFR_SYMBOLS") or "").strip()
|
||||
trade_date = (os.environ.get("QFR_TRADE_DATE") or "").strip() or None
|
||||
|
||||
if not symbols:
|
||||
raise SystemExit("QFR_SYMBOLS is required")
|
||||
|
||||
out: dict[str, Any] = {
|
||||
"rawdir": rawdir,
|
||||
"trade_date": trade_date,
|
||||
"moves": [],
|
||||
"errors": [],
|
||||
}
|
||||
|
||||
for sym in [s.strip() for s in symbols.split(",") if s.strip()]:
|
||||
try:
|
||||
df = _read_one(rawdir, sym)
|
||||
mv = _calc_move(df, sym, trade_date)
|
||||
if mv is None:
|
||||
out["errors"].append({"symbol": sym, "error": "no_data"})
|
||||
continue
|
||||
out["moves"].append(asdict(mv))
|
||||
except Exception as e:
|
||||
out["errors"].append({"symbol": sym, "error": str(e)})
|
||||
|
||||
print(json.dumps(out, ensure_ascii=True))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
99
backend/fastapi_app/services/retry_worker.py
Normal file
99
backend/fastapi_app/services/retry_worker.py
Normal file
@@ -0,0 +1,99 @@
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
|
||||
from .llm_extract import extract_event
|
||||
from .store import ensure_schema, get_store, insert_event_result
|
||||
|
||||
|
||||
def _pick_pending(conn, *, batch: int, max_attempts: int, min_age_s: int, retry_after_s: int):
|
||||
now = int(time.time())
|
||||
# Select items that are not successfully parsed yet.
|
||||
# We rely on raw_items.status/parse_attempts which are updated by insert_event_result.
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, title, content, lang, status, parse_attempts, last_parse_at
|
||||
FROM raw_items
|
||||
WHERE status != 'parsed_ok'
|
||||
AND COALESCE(parse_attempts, 0) < ?
|
||||
AND (? - created_at) >= ?
|
||||
AND (last_parse_at IS NULL OR (? - last_parse_at) >= ?)
|
||||
ORDER BY id ASC
|
||||
LIMIT ?
|
||||
""",
|
||||
(max_attempts, now, min_age_s, now, retry_after_s, batch),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def run_retry_loop(stop: threading.Event) -> None:
|
||||
if os.environ.get("PARSE_RETRY_ENABLED", "1") not in ("1", "true", "TRUE", "yes", "YES"):
|
||||
return
|
||||
|
||||
interval_s = int(os.environ.get("PARSE_RETRY_INTERVAL_S", "120"))
|
||||
batch = int(os.environ.get("PARSE_RETRY_BATCH", "5"))
|
||||
max_attempts = int(os.environ.get("PARSE_RETRY_MAX_ATTEMPTS", "6"))
|
||||
min_age_s = int(os.environ.get("PARSE_RETRY_MIN_AGE_S", "2"))
|
||||
retry_after_s = int(os.environ.get("PARSE_RETRY_AFTER_S", "300"))
|
||||
sleep_between_s = float(os.environ.get("PARSE_RETRY_SLEEP_BETWEEN_S", "0.5"))
|
||||
|
||||
st = get_store()
|
||||
|
||||
while not stop.is_set():
|
||||
try:
|
||||
conn = st.connect()
|
||||
ensure_schema(conn)
|
||||
|
||||
items = _pick_pending(
|
||||
conn,
|
||||
batch=batch,
|
||||
max_attempts=max_attempts,
|
||||
min_age_s=min_age_s,
|
||||
retry_after_s=retry_after_s,
|
||||
)
|
||||
|
||||
for it in items:
|
||||
if stop.is_set():
|
||||
break
|
||||
|
||||
# Jitter to reduce gateway burstiness.
|
||||
time.sleep(sleep_between_s + random.random() * 0.4)
|
||||
|
||||
title = (it.get("title") or "")[:500]
|
||||
content = (it.get("content") or "")[:20000]
|
||||
lang_hint = it.get("lang")
|
||||
|
||||
res = extract_event(title=title, content=content, lang_hint=lang_hint)
|
||||
|
||||
if res.get("ok") is True:
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=int(it["id"]),
|
||||
model=str(res.get("model") or ""),
|
||||
ok=True,
|
||||
event_json=json.dumps(res.get("event"), ensure_ascii=True),
|
||||
error=None,
|
||||
)
|
||||
else:
|
||||
# Preserve hint for debugging (no secrets).
|
||||
err = str(res.get("error") or "unknown")
|
||||
if err == "llm_failed":
|
||||
exc = str(res.get("exc") or "")
|
||||
if exc:
|
||||
err = f"{err}:{exc}"
|
||||
insert_event_result(
|
||||
conn,
|
||||
raw_item_id=int(it["id"]),
|
||||
model=str(res.get("model") or ""),
|
||||
ok=False,
|
||||
event_json=None,
|
||||
error=err,
|
||||
)
|
||||
|
||||
except Exception:
|
||||
# Keep the loop alive; details are in DB (last_error) or uvicorn logs.
|
||||
pass
|
||||
|
||||
stop.wait(interval_s)
|
||||
205
backend/fastapi_app/services/store.py
Normal file
205
backend/fastapi_app/services/store.py
Normal file
@@ -0,0 +1,205 @@
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Store:
|
||||
db_path: str
|
||||
|
||||
def connect(self) -> sqlite3.Connection:
|
||||
# check_same_thread=False allows use across simple dev reload threads.
|
||||
conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def _sqlite_path_from_database_url(url: str) -> str:
|
||||
# V1 supports only sqlite. Examples:
|
||||
# - sqlite:///./eventflow.sqlite3
|
||||
# - sqlite:////abs/path/eventflow.sqlite3
|
||||
if not url.startswith("sqlite:///"):
|
||||
raise ValueError("Only sqlite DATABASE_URL is supported in V1")
|
||||
return url[len("sqlite:///") :]
|
||||
|
||||
|
||||
def get_store() -> Store:
|
||||
url = os.environ.get("DATABASE_URL", "sqlite:///./eventflow.sqlite3")
|
||||
path = _sqlite_path_from_database_url(url)
|
||||
return Store(db_path=path)
|
||||
|
||||
|
||||
def _ensure_column(conn: sqlite3.Connection, *, table: str, col: str, ddl: str) -> None:
|
||||
cols = [r[1] for r in conn.execute(f"PRAGMA table_info({table})").fetchall()]
|
||||
if col not in cols:
|
||||
conn.execute(f"ALTER TABLE {table} ADD COLUMN {ddl}")
|
||||
|
||||
|
||||
def ensure_schema(conn: sqlite3.Connection) -> None:
|
||||
conn.executescript(
|
||||
"""
|
||||
PRAGMA journal_mode=WAL;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS raw_items (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source TEXT NOT NULL,
|
||||
item_date TEXT NOT NULL,
|
||||
published_at TEXT,
|
||||
url TEXT,
|
||||
title TEXT,
|
||||
content TEXT,
|
||||
lang TEXT,
|
||||
status TEXT NOT NULL DEFAULT 'new',
|
||||
created_at INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_items_source_date ON raw_items(source, item_date);
|
||||
CREATE INDEX IF NOT EXISTS idx_raw_items_created_at ON raw_items(created_at);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS events (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
raw_item_id INTEGER NOT NULL,
|
||||
model TEXT,
|
||||
ok INTEGER NOT NULL,
|
||||
event_json TEXT,
|
||||
error TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
FOREIGN KEY(raw_item_id) REFERENCES raw_items(id)
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_events_raw_item_id ON events(raw_item_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_events_created_at ON events(created_at);
|
||||
"""
|
||||
)
|
||||
|
||||
# Light migrations for V1: keep raw_items parse state for retry.
|
||||
_ensure_column(conn, table="raw_items", col="parse_attempts", ddl="parse_attempts INTEGER NOT NULL DEFAULT 0")
|
||||
_ensure_column(conn, table="raw_items", col="last_parse_at", ddl="last_parse_at INTEGER")
|
||||
_ensure_column(conn, table="raw_items", col="last_error", ddl="last_error TEXT")
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def insert_raw_item(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
source: str,
|
||||
item_date: str,
|
||||
title: str | None,
|
||||
content: str | None,
|
||||
url: str | None = None,
|
||||
published_at: str | None = None,
|
||||
lang: str | None = None,
|
||||
) -> int:
|
||||
now = int(time.time())
|
||||
cur = conn.execute(
|
||||
"""
|
||||
INSERT INTO raw_items (source, item_date, published_at, url, title, content, lang, status, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, 'new', ?)
|
||||
""",
|
||||
(source, item_date, published_at, url, title, content, lang, now),
|
||||
)
|
||||
conn.commit()
|
||||
return int(cur.lastrowid)
|
||||
|
||||
|
||||
def insert_event_result(
|
||||
conn: sqlite3.Connection,
|
||||
*,
|
||||
raw_item_id: int,
|
||||
model: str,
|
||||
ok: bool,
|
||||
event_json: str | None,
|
||||
error: str | None,
|
||||
) -> int:
|
||||
now = int(time.time())
|
||||
cur = conn.execute(
|
||||
"""
|
||||
INSERT INTO events (raw_item_id, model, ok, event_json, error, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(raw_item_id, model, 1 if ok else 0, event_json, error, now),
|
||||
)
|
||||
|
||||
# Update raw item parse state (best-effort).
|
||||
status = "parsed_ok" if ok else "parsed_err"
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE raw_items
|
||||
SET status = ?,
|
||||
parse_attempts = COALESCE(parse_attempts, 0) + 1,
|
||||
last_parse_at = ?,
|
||||
last_error = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(status, now, error, raw_item_id),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
return int(cur.lastrowid)
|
||||
|
||||
|
||||
def counts(conn: sqlite3.Connection) -> dict[str, Any]:
|
||||
ensure_schema(conn)
|
||||
row_raw = conn.execute("SELECT COUNT(1) AS n FROM raw_items").fetchone()
|
||||
row_ev = conn.execute("SELECT COUNT(1) AS n FROM events").fetchone()
|
||||
row_ok = conn.execute("SELECT COUNT(1) AS n FROM events WHERE ok=1").fetchone()
|
||||
row_bad = conn.execute("SELECT COUNT(1) AS n FROM events WHERE ok=0").fetchone()
|
||||
return {
|
||||
"raw_items": int(row_raw["n"]),
|
||||
"events": int(row_ev["n"]),
|
||||
"events_ok": int(row_ok["n"]),
|
||||
"events_err": int(row_bad["n"]),
|
||||
}
|
||||
|
||||
|
||||
def sources_today(conn: sqlite3.Connection, item_date: str) -> list[dict[str, Any]]:
|
||||
ensure_schema(conn)
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT source, COUNT(1) AS n
|
||||
FROM raw_items
|
||||
WHERE item_date = ?
|
||||
GROUP BY source
|
||||
ORDER BY source
|
||||
""",
|
||||
(item_date,),
|
||||
).fetchall()
|
||||
return [{"source": r["source"], "count": int(r["n"])} for r in rows]
|
||||
|
||||
|
||||
def list_raw_items(conn: sqlite3.Connection, limit: int = 20) -> list[dict[str, Any]]:
|
||||
ensure_schema(conn)
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, source, item_date, published_at, url, title, lang, status, created_at
|
||||
FROM raw_items
|
||||
ORDER BY id DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
def list_events(conn: sqlite3.Connection, limit: int = 20) -> list[dict[str, Any]]:
|
||||
ensure_schema(conn)
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT e.id, e.raw_item_id, e.model, e.ok, e.event_json, e.error, e.created_at,
|
||||
r.source, r.item_date, r.title
|
||||
FROM events e
|
||||
JOIN raw_items r ON r.id = e.raw_item_id
|
||||
ORDER BY e.id DESC
|
||||
LIMIT ?
|
||||
""",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
out: list[dict[str, Any]] = []
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
# keep event_json as string; UI can parse if needed
|
||||
out.append(d)
|
||||
return out
|
||||
BIN
eventflow.sqlite3-shm
Normal file
BIN
eventflow.sqlite3-shm
Normal file
Binary file not shown.
BIN
eventflow.sqlite3-wal
Normal file
BIN
eventflow.sqlite3-wal
Normal file
Binary file not shown.
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
||||
django>=5.0,<6.0
|
||||
fastapi>=0.110
|
||||
uvicorn[standard]>=0.27
|
||||
requests>=2.31
|
||||
feedparser>=6.0
|
||||
python-dotenv>=1.0
|
||||
openai>=1.40
|
||||
pydantic>=2.6
|
||||
Reference in New Issue
Block a user