109 lines
2.8 KiB
Python
109 lines
2.8 KiB
Python
|
|
import json
|
||
|
|
import os
|
||
|
|
from dataclasses import asdict, dataclass
|
||
|
|
from typing import Any
|
||
|
|
|
||
|
|
import pandas as pd
|
||
|
|
|
||
|
|
|
||
|
|
@dataclass
|
||
|
|
class Move:
|
||
|
|
symbol: str
|
||
|
|
trade_date: str
|
||
|
|
prev_trade_date: str
|
||
|
|
close: float
|
||
|
|
prev_close: float
|
||
|
|
ret_1d: float
|
||
|
|
vol_20d: float
|
||
|
|
z_1d: float
|
||
|
|
|
||
|
|
|
||
|
|
def _read_one(rawdir: str, symbol: str) -> pd.DataFrame:
|
||
|
|
# QFR stores parquet as e.g. 510300SH.parquet / 159915SZ.parquet
|
||
|
|
fn = symbol.replace(".", "") + ".parquet"
|
||
|
|
p = os.path.join(rawdir, fn)
|
||
|
|
df = pd.read_parquet(p)
|
||
|
|
# Standardize
|
||
|
|
if "trade_date" not in df.columns or "close" not in df.columns:
|
||
|
|
raise RuntimeError(f"unexpected parquet schema for {symbol}: {df.columns.tolist()}")
|
||
|
|
df = df.sort_values("trade_date").reset_index(drop=True)
|
||
|
|
return df
|
||
|
|
|
||
|
|
|
||
|
|
def _calc_move(df: pd.DataFrame, symbol: str, trade_date: str | None) -> Move | None:
|
||
|
|
if df.empty:
|
||
|
|
return None
|
||
|
|
# pick last available <= trade_date if given
|
||
|
|
if trade_date:
|
||
|
|
df2 = df[df["trade_date"] <= trade_date]
|
||
|
|
if df2.empty:
|
||
|
|
return None
|
||
|
|
df = df2
|
||
|
|
|
||
|
|
if len(df) < 2:
|
||
|
|
return None
|
||
|
|
|
||
|
|
# compute returns
|
||
|
|
close = df["close"].astype(float)
|
||
|
|
ret = close.pct_change()
|
||
|
|
|
||
|
|
i = len(df) - 1
|
||
|
|
prev_i = i - 1
|
||
|
|
|
||
|
|
td = str(df.iloc[i]["trade_date"])
|
||
|
|
ptd = str(df.iloc[prev_i]["trade_date"])
|
||
|
|
|
||
|
|
close_i = float(close.iloc[i])
|
||
|
|
prev_close_i = float(close.iloc[prev_i])
|
||
|
|
ret_1d = float(ret.iloc[i])
|
||
|
|
|
||
|
|
# vol over last 20 returns (excluding NaN)
|
||
|
|
vol_20 = float(ret.iloc[max(0, i - 20 + 1) : i + 1].std(skipna=True))
|
||
|
|
if not (vol_20 > 0):
|
||
|
|
vol_20 = 0.0
|
||
|
|
z = float(ret_1d / vol_20) if vol_20 > 0 else 0.0
|
||
|
|
|
||
|
|
return Move(
|
||
|
|
symbol=symbol,
|
||
|
|
trade_date=td,
|
||
|
|
prev_trade_date=ptd,
|
||
|
|
close=close_i,
|
||
|
|
prev_close=prev_close_i,
|
||
|
|
ret_1d=ret_1d,
|
||
|
|
vol_20d=vol_20,
|
||
|
|
z_1d=z,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> None:
|
||
|
|
rawdir = os.environ.get("QFR_RAWDIR") or "/home/openclaw/projects/quant-factor-research/data/raw"
|
||
|
|
symbols = (os.environ.get("QFR_SYMBOLS") or "").strip()
|
||
|
|
trade_date = (os.environ.get("QFR_TRADE_DATE") or "").strip() or None
|
||
|
|
|
||
|
|
if not symbols:
|
||
|
|
raise SystemExit("QFR_SYMBOLS is required")
|
||
|
|
|
||
|
|
out: dict[str, Any] = {
|
||
|
|
"rawdir": rawdir,
|
||
|
|
"trade_date": trade_date,
|
||
|
|
"moves": [],
|
||
|
|
"errors": [],
|
||
|
|
}
|
||
|
|
|
||
|
|
for sym in [s.strip() for s in symbols.split(",") if s.strip()]:
|
||
|
|
try:
|
||
|
|
df = _read_one(rawdir, sym)
|
||
|
|
mv = _calc_move(df, sym, trade_date)
|
||
|
|
if mv is None:
|
||
|
|
out["errors"].append({"symbol": sym, "error": "no_data"})
|
||
|
|
continue
|
||
|
|
out["moves"].append(asdict(mv))
|
||
|
|
except Exception as e:
|
||
|
|
out["errors"].append({"symbol": sym, "error": str(e)})
|
||
|
|
|
||
|
|
print(json.dumps(out, ensure_ascii=True))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|