Files
quant-factor-research/scripts/tushare_download_universe.py

48 lines
1.6 KiB
Python

from __future__ import annotations
import argparse
import json
from pathlib import Path
import pandas as pd
from qfr.data.tushare_client import fetch_fund_daily, load_tushare_config
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--env", default=None, help="Path to .env")
p.add_argument("--config", default="configs/etf_universe.json")
p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD")
p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD")
p.add_argument("--outdir", default="data/raw")
args = p.parse_args()
cfg = load_tushare_config(args.env)
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
assets = conf["assets"]
outdir = Path(args.outdir)
outdir.mkdir(parents=True, exist_ok=True)
for a in assets:
ts_code = a["ts_code"]
df = fetch_fund_daily(cfg, ts_code=ts_code, start_date=args.start_date, end_date=args.end_date)
if df is None or df.empty:
print(f"skip {ts_code}: empty")
continue
# standardize columns expected by backtest
# fund_daily provides: ts_code, trade_date, open, high, low, close, vol, amount
keep = [c for c in ["ts_code", "trade_date", "open", "high", "low", "close", "vol", "amount"] if c in df.columns]
df = df[keep].copy()
df = df.sort_values("trade_date")
out = outdir / f"{ts_code.replace('.', '')}.parquet"
df.to_parquet(out, index=False)
print(f"wrote {ts_code}: {len(df)} rows -> {out}")
if __name__ == "__main__":
main()