from __future__ import annotations import argparse import json from pathlib import Path import pandas as pd from qfr.data.tushare_client import fetch_fund_daily, load_tushare_config def main() -> None: p = argparse.ArgumentParser() p.add_argument("--env", default=None, help="Path to .env") p.add_argument("--config", default="configs/etf_universe.json") p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD") p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD") p.add_argument("--outdir", default="data/raw") args = p.parse_args() cfg = load_tushare_config(args.env) conf = json.loads(Path(args.config).read_text(encoding="utf-8")) assets = conf["assets"] outdir = Path(args.outdir) outdir.mkdir(parents=True, exist_ok=True) for a in assets: ts_code = a["ts_code"] df = fetch_fund_daily(cfg, ts_code=ts_code, start_date=args.start_date, end_date=args.end_date) if df is None or df.empty: print(f"skip {ts_code}: empty") continue # standardize columns expected by backtest # fund_daily provides: ts_code, trade_date, open, high, low, close, vol, amount keep = [c for c in ["ts_code", "trade_date", "open", "high", "low", "close", "vol", "amount"] if c in df.columns] df = df[keep].copy() df = df.sort_values("trade_date") out = outdir / f"{ts_code.replace('.', '')}.parquet" df.to_parquet(out, index=False) print(f"wrote {ts_code}: {len(df)} rows -> {out}") if __name__ == "__main__": main()