import full eventflow project

This commit is contained in:
2026-03-13 17:18:19 +08:00
parent 0edc8f7477
commit bbae58e4fe
21 changed files with 1023 additions and 0 deletions

View File

View File

@@ -0,0 +1,58 @@
import json
from fastapi import APIRouter, HTTPException
from ..services.llm_extract import extract_event
from ..services.store import ensure_schema, get_store, insert_event_result, insert_raw_item
router = APIRouter()
@router.post("/event")
def analyze_event(payload: dict):
"""Analyze one ad-hoc item and persist it.
This powers the UI while we build full ingesters.
"""
title = (payload.get("title") or "").strip()
content = (payload.get("content") or "").strip()
if not title and not content:
raise HTTPException(status_code=400, detail="missing title/content")
st = get_store()
conn = st.connect()
ensure_schema(conn)
raw_item_id = insert_raw_item(
conn,
source=str(payload.get("source") or "manual"),
item_date=str(payload.get("date") or "") or "manual",
title=title[:500],
content=content[:20_000],
url=payload.get("url"),
published_at=payload.get("published_at"),
lang=payload.get("lang"),
)
res = extract_event(title=title, content=content, lang_hint=payload.get("lang"))
if res.get("ok") is True:
insert_event_result(
conn,
raw_item_id=raw_item_id,
model=str(res.get("model") or ""),
ok=True,
event_json=json.dumps(res.get("event"), ensure_ascii=True),
error=None,
)
else:
insert_event_result(
conn,
raw_item_id=raw_item_id,
model=str(res.get("model") or ""),
ok=False,
event_json=None,
error=str(res.get("error") or "unknown"),
)
return {"raw_item_id": raw_item_id, **res}

View File

@@ -0,0 +1,214 @@
import json
from datetime import date
from fastapi import APIRouter, HTTPException
from ..services.llm_extract import extract_event
from ..services.market_moves import fetch_moves_via_qfr
from ..services.store import (
ensure_schema,
get_store,
insert_event_result,
insert_raw_item,
)
router = APIRouter()
def _today() -> str:
return date.today().isoformat()
@router.post("/rss")
def ingest_rss(payload: dict):
"""Ingest one or many RSS items.
Expected payload:
{"items": [{"title":..., "url":..., "published_at":..., "summary":..., "lang":...}, ...]}
"""
items = payload.get("items")
if not isinstance(items, list) or not items:
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
st = get_store()
conn = st.connect()
ensure_schema(conn)
n = 0
for it in items:
if not isinstance(it, dict):
continue
insert_raw_item(
conn,
source="rss",
item_date=payload.get("date") or _today(),
title=(it.get("title") or "")[:500],
content=(it.get("summary") or it.get("content") or "")[:20_000],
url=it.get("url"),
published_at=it.get("published_at"),
lang=it.get("lang"),
)
n += 1
return {"ok": True, "inserted": n}
@router.post("/macro")
def ingest_macro(payload: dict):
items = payload.get("items")
if not isinstance(items, list) or not items:
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
st = get_store()
conn = st.connect()
ensure_schema(conn)
n = 0
for it in items:
if not isinstance(it, dict):
continue
insert_raw_item(
conn,
source="macro",
item_date=payload.get("date") or _today(),
title=(it.get("title") or "")[:500],
content=(it.get("content") or "")[:20_000],
url=it.get("url"),
published_at=it.get("published_at"),
lang=it.get("lang"),
)
n += 1
return {"ok": True, "inserted": n}
@router.post("/market_moves")
def ingest_market_moves(payload: dict):
items = payload.get("items")
if not isinstance(items, list) or not items:
raise HTTPException(status_code=400, detail="payload.items must be a non-empty list")
st = get_store()
conn = st.connect()
ensure_schema(conn)
n = 0
for it in items:
if not isinstance(it, dict):
continue
insert_raw_item(
conn,
source="market_moves",
item_date=payload.get("date") or _today(),
title=(it.get("title") or "")[:500],
content=(it.get("content") or "")[:20_000],
url=it.get("url"),
published_at=it.get("published_at"),
lang=it.get("lang"),
)
n += 1
return {"ok": True, "inserted": n}
@router.post("/market_moves/run")
def run_market_moves(payload: dict | None = None):
"""Generate daily market-move items from QFR raw data and parse them into events."""
payload = payload or {}
day = str(payload.get("date") or _today())
# QFR raw data uses trade_date like YYYYMMDD.
trade_date = day.replace("-", "")
st = get_store()
conn = st.connect()
ensure_schema(conn)
data = fetch_moves_via_qfr(trade_date=trade_date, symbols=payload.get("symbols"))
if not data.get("ok"):
raise HTTPException(status_code=500, detail=data)
inserted = 0
parsed_ok = 0
parsed_err = 0
for mv in data.get("moves", []):
sym = mv.get("symbol")
td = mv.get("trade_date")
ret_1d = mv.get("ret_1d")
vol_20d = mv.get("vol_20d")
z_1d = mv.get("z_1d")
title = f"Market move {sym} {td}: ret_1d={ret_1d:.4f} z_1d={z_1d:.2f}" if isinstance(ret_1d, (int, float)) and isinstance(z_1d, (int, float)) else f"Market move {sym} {td}"
content = (
f"symbol={sym}\n"
f"trade_date={td}\n"
f"prev_trade_date={mv.get('prev_trade_date')}\n"
f"close={mv.get('close')} prev_close={mv.get('prev_close')}\n"
f"ret_1d={ret_1d} vol_20d={vol_20d} z_1d={z_1d}\n"
"Interpretation task: explain the most likely macro/industry drivers for this move and which assets could be affected."
)
raw_item_id = insert_raw_item(
conn,
source="market_moves",
item_date=day,
title=title[:500],
content=content[:20_000],
url=None,
published_at=None,
lang="en",
)
inserted += 1
try:
res = extract_event(title=title, content=content, lang_hint="en")
except Exception as e:
# Network/provider errors should not abort the whole batch.
insert_event_result(
conn,
raw_item_id=raw_item_id,
model="",
ok=False,
event_json=None,
error=f"llm_exception:{type(e).__name__}",
)
parsed_err += 1
continue
if res.get("ok") is True:
insert_event_result(
conn,
raw_item_id=raw_item_id,
model=str(res.get("model") or ""),
ok=True,
event_json=json.dumps(res.get("event"), ensure_ascii=True),
error=None,
)
parsed_ok += 1
else:
err = str(res.get("error") or "unknown")
if err == "llm_failed":
# Keep a short hint to debug gateway flakiness without dumping secrets.
exc = str(res.get("exc") or "")
if exc:
err = f"{err}:{exc}"
insert_event_result(
conn,
raw_item_id=raw_item_id,
model=str(res.get("model") or ""),
ok=False,
event_json=None,
error=err,
)
parsed_err += 1
return {
"ok": True,
"date": day,
"inserted": inserted,
"parsed_ok": parsed_ok,
"parsed_err": parsed_err,
"errors": data.get("errors", []),
"symbols": data.get("symbols"),
}

View File

@@ -0,0 +1,215 @@
from datetime import date
from fastapi import APIRouter
from fastapi.responses import HTMLResponse
from ..services.store import counts, get_store, list_events, list_raw_items, sources_today
router = APIRouter()
@router.get("/health")
def health():
return {"ok": True}
@router.get("/status")
def status(day: str | None = None):
st = get_store()
conn = st.connect()
item_date = day or date.today().isoformat()
return {
"date": item_date,
"counts": counts(conn),
"sources": sources_today(conn, item_date),
}
@router.get("/raw_items")
def raw_items(limit: int = 20):
st = get_store()
conn = st.connect()
return {"items": list_raw_items(conn, limit=limit)}
@router.get("/events")
def events(limit: int = 20):
st = get_store()
conn = st.connect()
return {"items": list_events(conn, limit=limit)}
@router.get("/ui", response_class=HTMLResponse)
def ui():
# Tiny no-build UI for early validation.
html = """<!doctype html>
<html>
<head>
<meta charset=\"utf-8\" />
<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
<title>EventFlow V1</title>
<style>
body { font-family: ui-sans-serif, system-ui, Arial; margin: 18px; }
h1 { margin: 0 0 8px 0; }
.row { display: flex; gap: 16px; flex-wrap: wrap; }
.card { border: 1px solid #ddd; border-radius: 8px; padding: 12px; min-width: 320px; }
table { border-collapse: collapse; width: 100%; }
th, td { border-bottom: 1px solid #eee; padding: 6px 8px; font-size: 13px; vertical-align: top; }
th { text-align: left; color: #444; }
code { background: #f6f6f6; padding: 2px 4px; border-radius: 4px; }
.muted { color: #666; }
.ok { color: #0a7; font-weight: 600; }
.bad { color: #b00; font-weight: 600; }
.mono { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; }
button { padding: 6px 10px; }
input, textarea { width: 100%; box-sizing: border-box; }
textarea { height: 90px; }
</style>
</head>
<body>
<h1>EventFlow V1</h1>
<div class=\"muted\">Status page: sources, dates, counts, and parsed events.</div>
<div style=\"height:12px\"></div>
<div class=\"row\">
<div class=\"card\" style=\"flex: 1\">
<div style=\"display:flex; justify-content:space-between; align-items:center; gap:12px;\">
<div>
<div><b>Today</b>: <span id=\"today\"></span></div>
<div class=\"muted\">Auto-refresh every 5s</div>
</div>
<button onclick=\"refreshAll()\">Refresh</button>
</div>
<div style=\"height:10px\"></div>
<div id=\"counts\" class=\"mono\"></div>
<div style=\"height:10px\"></div>
<div><b>Sources (today)</b></div>
<table id=\"sources\"></table>
</div>
<div class=\"card\" style=\"flex: 1\">
<div><b>Quick Analyze (manual)</b></div>
<div class=\"muted\">This will save a raw item + parsed event into SQLite.</div>
<div style=\"height:8px\"></div>
<label>Title</label>
<input id=\"m_title\" placeholder=\"e.g. Fed signals higher-for-longer rates\" />
<div style=\"height:6px\"></div>
<label>Content</label>
<textarea id=\"m_content\" placeholder=\"Paste a paragraph...\"></textarea>
<div style=\"height:6px\"></div>
<button onclick=\"runAnalyze()\">Analyze</button>
<div id=\"m_out\" class=\"mono\" style=\"white-space:pre-wrap; margin-top:10px;\"></div>
</div>
</div>
<div style=\"height:16px\"></div>
<div class=\"row\">
<div class=\"card\" style=\"flex: 1\">
<div><b>Latest Raw Items</b> <span class=\"muted\">(limit 20)</span></div>
<table id=\"raw\"></table>
</div>
<div class=\"card\" style=\"flex: 1\">
<div><b>Latest Parsed Events</b> <span class=\"muted\">(limit 20)</span></div>
<table id=\"events\"></table>
</div>
</div>
<script>
function esc(s) {
return (s ?? '').toString().replaceAll('&', '&amp;').replaceAll('<', '&lt;').replaceAll('>', '&gt;');
}
async function jget(url) {
const r = await fetch(url);
if (!r.ok) throw new Error('HTTP ' + r.status);
return await r.json();
}
function setTable(el, headers, rows) {
let h = '<tr>' + headers.map(x => `<th>${esc(x)}</th>`).join('') + '</tr>';
let b = rows.map(r => '<tr>' + r.map(x => `<td>${x}</td>`).join('') + '</tr>').join('');
el.innerHTML = h + b;
}
async function refreshStatus() {
const st = await jget('/query/status');
document.getElementById('today').textContent = st.date;
const c = st.counts;
document.getElementById('counts').textContent = `raw_items=${c.raw_items} events=${c.events} ok=${c.events_ok} err=${c.events_err}`;
const srows = (st.sources || []).map(x => [esc(x.source), esc(x.count)]);
setTable(document.getElementById('sources'), ['source', 'count'], srows);
}
async function refreshRaw() {
const data = await jget('/query/raw_items?limit=20');
const rows = (data.items || []).map(it => {
const u = it.url ? `<a href="${esc(it.url)}" target="_blank">link</a>` : '';
return [
`<span class="mono">${esc(it.id)}</span>`,
esc(it.source),
esc(it.item_date),
esc(it.lang || ''),
esc((it.title || '').slice(0, 120)),
u,
];
});
setTable(document.getElementById('raw'), ['id', 'source', 'date', 'lang', 'title', 'url'], rows);
}
async function refreshEvents() {
const data = await jget('/query/events?limit=20');
const rows = (data.items || []).map(it => {
const cls = it.ok ? 'ok' : 'bad';
let ev = '';
try {
if (it.event_json) {
const obj = JSON.parse(it.event_json);
ev = `${esc(obj.event_type)} dir=${esc(obj.impact_direction)} conf=${esc(obj.confidence)}<br/><span class="muted">${esc((obj.summary_en || obj.summary_zh || '').slice(0, 140))}</span>`;
}
} catch (e) {
ev = '<span class="bad">bad json</span>';
}
return [
`<span class="mono">${esc(it.id)}</span>`,
esc(it.source),
`<span class="${cls}">${it.ok ? 'ok' : 'err'}</span>`,
esc((it.title || '').slice(0, 90)),
ev,
];
});
setTable(document.getElementById('events'), ['id', 'source', 'ok', 'raw_title', 'event'], rows);
}
async function runAnalyze() {
const title = document.getElementById('m_title').value;
const content = document.getElementById('m_content').value;
const out = document.getElementById('m_out');
out.textContent = 'running...';
const r = await fetch('/analyze/event', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({source: 'manual', date: new Date().toISOString().slice(0,10), title, content, lang: ''}),
});
const j = await r.json();
out.textContent = JSON.stringify(j, null, 2);
await refreshAll();
}
async function refreshAll() {
try {
await refreshStatus();
await refreshRaw();
await refreshEvents();
} catch (e) {
console.error(e);
}
}
refreshAll();
setInterval(refreshAll, 5000);
</script>
</body>
</html>"""
return HTMLResponse(content=html)