initial import: etf strategy project
This commit is contained in:
148
scripts/expand_etf_universe.py
Normal file
148
scripts/expand_etf_universe.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.data.tushare_client import load_tushare_config, pro_api
|
||||
|
||||
|
||||
def median_amount(cfg, ts_code: str, start: str, end: str) -> float:
|
||||
api = pro_api(cfg)
|
||||
df = api.fund_daily(ts_code=ts_code, start_date=start, end_date=end, fields="trade_date,amount")
|
||||
if df is None or df.empty or "amount" not in df.columns:
|
||||
return 0.0
|
||||
amt = pd.to_numeric(df["amount"], errors="coerce").dropna()
|
||||
if amt.empty:
|
||||
return 0.0
|
||||
return float(amt.median())
|
||||
|
||||
|
||||
def classify_by_keyword(kw: str) -> str:
|
||||
# very rough tagging for universe constraints / reporting
|
||||
equity_kws = {
|
||||
"半导体",
|
||||
"芯片",
|
||||
"通信",
|
||||
"5G",
|
||||
"通信设备",
|
||||
"军工",
|
||||
"机器人",
|
||||
"工业母机",
|
||||
"智能制造",
|
||||
"消费电子",
|
||||
"AI",
|
||||
"算力",
|
||||
"软件",
|
||||
"创新药",
|
||||
"医药",
|
||||
"新能源",
|
||||
"光伏",
|
||||
"锂电",
|
||||
"电池",
|
||||
"新材料",
|
||||
"稀土",
|
||||
}
|
||||
commodity_kws = {"黄金", "白银", "有色", "稀土", "矿业", "原油", "油", "煤", "化工", "豆粕", "农业"}
|
||||
rates_kws = {"国债", "政金债", "债", "短债", "中债"}
|
||||
|
||||
if kw in rates_kws:
|
||||
return "rates_cn"
|
||||
if kw in commodity_kws:
|
||||
return "commodity_cn"
|
||||
if kw in equity_kws:
|
||||
return "equity_cn_sector"
|
||||
return "equity_cn_sector"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--out", default=None)
|
||||
ap.add_argument("--per_keyword", type=int, default=2)
|
||||
ap.add_argument("--min_median_amount", type=float, default=0.0)
|
||||
ap.add_argument(
|
||||
"--keywords",
|
||||
default=(
|
||||
"半导体,芯片,通信,5G,通信设备,军工,机器人,工业母机,智能制造,消费电子,AI,算力,软件,创新药,医药,新能源,光伏,锂电,电池,"
|
||||
"矿业,有色,稀土,新材料,黄金,白银,原油,煤,化工,豆粕,农业,国债,政金债"
|
||||
),
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
cfg = load_tushare_config()
|
||||
api = pro_api(cfg)
|
||||
|
||||
conf_path = Path(args.config)
|
||||
conf = json.loads(conf_path.read_text(encoding="utf-8"))
|
||||
|
||||
assets = conf.get("assets", [])
|
||||
have = {a["ts_code"] for a in assets}
|
||||
|
||||
kw_list = [k.strip() for k in str(args.keywords).split(",") if k.strip()]
|
||||
|
||||
fb = api.fund_basic(market="E", status="L", fields="ts_code,name")
|
||||
if fb is None or fb.empty:
|
||||
raise RuntimeError("fund_basic returned empty")
|
||||
|
||||
fb = fb.dropna(subset=["ts_code", "name"]).copy()
|
||||
|
||||
end = date.today().strftime("%Y%m%d")
|
||||
start = (date.today() - timedelta(days=180)).strftime("%Y%m%d")
|
||||
|
||||
buckets: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
||||
for _, r in fb.iterrows():
|
||||
ts_code = str(r["ts_code"]).strip()
|
||||
name = str(r["name"]).strip()
|
||||
for kw in kw_list:
|
||||
if kw in name:
|
||||
buckets[kw].append((ts_code, name))
|
||||
break
|
||||
|
||||
chosen: list[tuple[str, str, str, float, str]] = []
|
||||
|
||||
for kw in kw_list:
|
||||
cands = buckets.get(kw, [])
|
||||
if not cands:
|
||||
continue
|
||||
|
||||
scored: list[tuple[float, str, str]] = []
|
||||
for ts_code, name in cands:
|
||||
if ts_code in have:
|
||||
continue
|
||||
try:
|
||||
m = median_amount(cfg, ts_code, start, end)
|
||||
except Exception:
|
||||
m = 0.0
|
||||
if not math.isfinite(m) or m <= 0:
|
||||
continue
|
||||
if m < float(args.min_median_amount):
|
||||
continue
|
||||
scored.append((m, ts_code, name))
|
||||
|
||||
scored.sort(reverse=True)
|
||||
for m, ts_code, name in scored[: int(args.per_keyword)]:
|
||||
cls = classify_by_keyword(kw)
|
||||
chosen.append((kw, ts_code, name, m, cls))
|
||||
|
||||
for kw, ts_code, name, m, cls in chosen:
|
||||
assets.append({"ts_code": ts_code, "asset_class": cls, "name": name})
|
||||
have.add(ts_code)
|
||||
|
||||
conf["assets"] = assets
|
||||
|
||||
out_path = Path(args.out) if args.out else conf_path
|
||||
out_path.write_text(json.dumps(conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
print(f"added {len(chosen)} ETFs")
|
||||
for kw, ts_code, name, m, cls in chosen[:80]:
|
||||
print(f"{kw}\t{ts_code}\t{m:.0f}\t{cls}\t{name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user