initial import: etf strategy project

This commit is contained in:
2026-03-13 17:10:49 +08:00
commit 79ea983ca3
123 changed files with 6398 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
from __future__ import annotations
import argparse
from pathlib import Path
import pandas as pd
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--equity", required=True)
ap.add_argument("--trades", required=True)
ap.add_argument("--top", type=int, default=3)
args = ap.parse_args()
eq = pd.read_parquet(args.equity)
eq = eq.copy()
eq.index = eq.index.astype(str)
s = eq["equity"].astype(float)
peak = s.cummax()
dd = s / peak - 1.0
# find worst drawdowns by trough
worst = dd.nsmallest(args.top)
tr = pd.read_parquet(args.trades)
tr = tr.copy()
tr["trade_date"] = tr["trade_date"].astype(str)
for d, v in worst.items():
# drawdown start = last peak before d
peak_date = (s.loc[:d]).idxmax()
print("---")
print("trough", d, "dd", float(v))
print("peak", peak_date, "peak_equity", float(s.loc[peak_date]), "trough_equity", float(s.loc[d]))
w = tr[(tr["trade_date"] >= peak_date) & (tr["trade_date"] <= d)]
print("trades in window", len(w))
if not w.empty:
cols = [c for c in ["trade_date", "ts_code", "side", "reason", "weight_before", "weight_after", "price"] if c in w.columns]
print(w[cols].tail(25).to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,164 @@
from __future__ import annotations
import argparse
import itertools
import json
from dataclasses import replace
from pathlib import Path
import numpy as np
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy", "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, risk_proxy, rates_fallback
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
dd = float((equity / equity.cummax() - 1.0).min())
calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar}
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--config", default="configs/etf_universe.json")
p.add_argument("--rawdir", default="data/raw")
p.add_argument("--start", default="20200101")
p.add_argument("--end", default="20251231")
p.add_argument("--out", default="data/tune_results.parquet")
args = p.parse_args()
config_path = Path(args.config)
raw_dir = Path(args.rawdir)
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
prices = load_prices(raw_dir, universe, args.start, args.end)
base = TrendParams()
# small grid to keep runtime reasonable
fast_list = [5, 10]
slow_list = [20, 40]
atr_mult_list = [2.5, 3.0]
vol_window_list = [10, 20]
port_vol_window_list = [40, 60]
max_positions_list = [3, 4]
rows = []
for sma_fast, sma_slow, atr_mult, vol_window, port_vol_window, max_positions in itertools.product(
fast_list,
slow_list,
atr_mult_list,
vol_window_list,
port_vol_window_list,
max_positions_list,
):
if sma_fast >= sma_slow:
continue
params = replace(
base,
sma_fast=sma_fast,
sma_slow=sma_slow,
atr_mult=atr_mult,
vol_window=vol_window,
port_vol_window=port_vol_window,
max_positions=max_positions,
rebalance_every=1,
)
cons = replace(constraints, max_positions=max_positions)
equity, _weights = run_backtest(
prices,
universe,
cons,
params,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
st = perf_stats(equity["equity"])
if not st:
continue
row = {
"sma_fast": sma_fast,
"sma_slow": sma_slow,
"atr_mult": atr_mult,
"vol_window": vol_window,
"port_vol_window": port_vol_window,
"max_positions": max_positions,
**st,
}
rows.append(row)
df = pd.DataFrame(rows)
if df.empty:
print("no results")
return
# filter by vol constraint first, then sort by ann_return
filt = df[df["ann_vol"] <= 0.18].copy()
if filt.empty:
filt = df.copy()
filt = filt.sort_values(["ann_return", "calmar"], ascending=False)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
filt.to_parquet(out, index=False)
print("top10")
cols = [
"ann_return",
"ann_vol",
"max_drawdown",
"calmar",
"sma_fast",
"sma_slow",
"atr_mult",
"vol_window",
"port_vol_window",
"max_positions",
]
print(filt[cols].head(10).to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,229 @@
from __future__ import annotations
import argparse
import itertools
import json
from dataclasses import replace
from pathlib import Path
import numpy as np
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, compute_features, portfolio_vol, risk_parity_weights, select_portfolio
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy", "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, risk_proxy, rates_fallback
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
dd = float((equity / equity.cummax() - 1.0).min())
calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar}
def run_backtest_cached(
feats: dict[str, pd.DataFrame],
universe: list[UniverseAsset],
constraints: Constraints,
params: TrendParams,
rates_fallback: str,
risk_proxy: str,
) -> pd.DataFrame:
# align dates intersection
dates = None
for f in feats.values():
d = set(f["trade_date"].astype(str))
dates = d if dates is None else dates.intersection(d)
if not dates:
raise RuntimeError("No overlapping trade_date")
all_dates = sorted(dates)
close_px = pd.DataFrame(index=all_dates)
ret1 = pd.DataFrame(index=all_dates)
for ts, f in feats.items():
g = f.set_index("trade_date").reindex(all_dates)
close_px[ts] = g["close"].astype(float)
ret1[ts] = close_px[ts].pct_change().fillna(0.0)
if risk_proxy not in close_px.columns:
raise RuntimeError("risk_proxy missing")
weights = pd.DataFrame(0.0, index=all_dates, columns=close_px.columns)
in_pos: set[str] = set()
highest_close: dict[str, float] = {}
atr_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["atr"].astype(float) for ts in close_px.columns}
mf_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_fast"].astype(float) for ts in close_px.columns}
ms_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_slow"].astype(float) for ts in close_px.columns}
last_reb = -10**9
for i, d in enumerate(all_dates):
if i > 0:
weights.loc[d] = weights.iloc[i - 1]
for ts in list(in_pos):
c = float(close_px.loc[d, ts])
if np.isfinite(c):
highest_close[ts] = max(highest_close.get(ts, c), c)
# exits
for ts in list(in_pos):
c = float(close_px.loc[d, ts])
mf = float(mf_map[ts].loc[d])
ms = float(ms_map[ts].loc[d])
atr = float(atr_map[ts].loc[d])
h = highest_close.get(ts, c)
trend_break = (np.isfinite(mf) and np.isfinite(ms) and (mf < ms))
chand_break = np.isfinite(atr) and c < (h - params.atr_mult * atr)
if trend_break or chand_break:
weights.loc[d, ts] = 0.0
in_pos.remove(ts)
highest_close.pop(ts, None)
if (i - last_reb) >= params.rebalance_every:
rows = []
for ts in close_px.columns:
f = feats[ts].set_index("trade_date").reindex([d]).iloc[0]
rows.append((ts, bool(f["trend_ok"]) if pd.notna(f["trend_ok"]) else False,
float(f["score_raw"]) if pd.notna(f["score_raw"]) else float("nan"),
float(f["vol"]) if pd.notna(f["vol"]) else float("nan")))
snap = pd.DataFrame(rows, columns=["ts_code", "trend_ok", "score_raw", "vol"]).set_index("ts_code")
picks = select_portfolio(snap, universe, constraints)
vol = snap.loc[picks, "vol"].copy()
w = risk_parity_weights(vol, max_w=0.50)
trailing = ret1[picks].iloc[max(0, i - params.port_vol_window + 1) : i + 1]
pvol = portfolio_vol(trailing, w)
scale = 1.0
if np.isfinite(pvol) and pvol > 0:
scale = min(1.0, params.target_ann_vol / pvol)
w_exec = w * scale
weights.loc[d] = 0.0
for ts, wi in w_exec.items():
weights.loc[d, ts] = float(wi)
rem = 1.0 - float(w_exec.sum())
if rem > 1e-12 and rates_fallback in weights.columns:
weights.loc[d, rates_fallback] += rem
in_pos = {ts for ts in close_px.columns if weights.loc[d, ts] > 1e-12}
for ts in in_pos:
c = float(close_px.loc[d, ts])
highest_close[ts] = max(highest_close.get(ts, c), c)
last_reb = i
w_lag = weights.shift(1).fillna(0.0)
port_ret = (ret1 * w_lag).sum(axis=1)
equity = (1.0 + port_ret).cumprod().to_frame("equity")
return equity
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--out", default="data/tune_results_fast.parquet")
args = ap.parse_args()
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
base = TrendParams(rebalance_every=1)
# grid (keep small)
fast_list = [3, 5, 8]
slow_list = [15, 20, 30]
atr_mult_list = [2.0, 2.5, 3.0]
vol_window_list = [10, 20]
port_vol_window_list = [40, 60]
max_positions_list = [3, 4]
rows = []
for sma_fast, sma_slow in itertools.product(fast_list, slow_list):
if sma_fast >= sma_slow:
continue
for atr_mult, vol_window, port_vol_window, max_positions in itertools.product(
atr_mult_list, vol_window_list, port_vol_window_list, max_positions_list
):
params = replace(
base,
max_positions=max_positions,
sma_fast=sma_fast,
sma_slow=sma_slow,
atr_mult=atr_mult,
vol_window=vol_window,
port_vol_window=port_vol_window,
)
cons = replace(constraints, max_positions=max_positions)
feats = {ts: compute_features(df, params) for ts, df in prices.items()}
equity = run_backtest_cached(feats, universe, cons, params, rates_fallback, risk_proxy)
st = perf_stats(equity["equity"])
if not st:
continue
rows.append({
"sma_fast": sma_fast,
"sma_slow": sma_slow,
"atr_mult": atr_mult,
"vol_window": vol_window,
"port_vol_window": port_vol_window,
"max_positions": max_positions,
**st,
})
df = pd.DataFrame(rows)
if df.empty:
print("no results")
return
filt = df[df["ann_vol"] <= 0.18].sort_values(["ann_return", "calmar"], ascending=False)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
filt.to_parquet(out, index=False)
cols = ["ann_return", "ann_vol", "max_drawdown", "calmar", "sma_fast", "sma_slow", "atr_mult", "vol_window", "port_vol_window", "max_positions"]
print("top10")
print(filt[cols].head(10).to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,95 @@
from __future__ import annotations
import argparse
import json
from dataclasses import replace
from pathlib import Path
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path):
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
return universe, constraints, cons.get("risk_proxy", "510300.SH"), cons.get("rates_fallback", "511010.SH")
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str):
out = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series):
r = equity.pct_change().dropna()
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
dd = float((equity / equity.cummax() - 1.0).min())
return ann_ret, ann_vol, dd
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
args = ap.parse_args()
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
base = TrendParams(rebalance_every=1, max_positions=4)
# A very small candidate set (fast to run)
candidates = [
(5, 20, 3.0),
(5, 20, 2.5),
(3, 15, 2.5),
(8, 30, 3.0),
(10, 40, 3.0),
(5, 30, 3.0),
]
rows = []
for sma_fast, sma_slow, atr_mult in candidates:
params = replace(base, sma_fast=sma_fast, sma_slow=sma_slow, atr_mult=atr_mult)
equity, _w = run_backtest(
prices,
universe,
constraints,
params,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
ann_ret, ann_vol, dd = perf_stats(equity["equity"])
rows.append({
"ann_return": ann_ret,
"ann_vol": ann_vol,
"max_drawdown": dd,
"sma_fast": sma_fast,
"sma_slow": sma_slow,
"atr_mult": atr_mult,
})
df = pd.DataFrame(rows).sort_values(["ann_return"], ascending=False)
print(df.to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,148 @@
from __future__ import annotations
import argparse
import json
import math
from collections import defaultdict
from datetime import date, timedelta
from pathlib import Path
import pandas as pd
from qfr.data.tushare_client import load_tushare_config, pro_api
def median_amount(cfg, ts_code: str, start: str, end: str) -> float:
api = pro_api(cfg)
df = api.fund_daily(ts_code=ts_code, start_date=start, end_date=end, fields="trade_date,amount")
if df is None or df.empty or "amount" not in df.columns:
return 0.0
amt = pd.to_numeric(df["amount"], errors="coerce").dropna()
if amt.empty:
return 0.0
return float(amt.median())
def classify_by_keyword(kw: str) -> str:
# very rough tagging for universe constraints / reporting
equity_kws = {
"半导体",
"芯片",
"通信",
"5G",
"通信设备",
"军工",
"机器人",
"工业母机",
"智能制造",
"消费电子",
"AI",
"算力",
"软件",
"创新药",
"医药",
"新能源",
"光伏",
"锂电",
"电池",
"新材料",
"稀土",
}
commodity_kws = {"黄金", "白银", "有色", "稀土", "矿业", "原油", "", "", "化工", "豆粕", "农业"}
rates_kws = {"国债", "政金债", "", "短债", "中债"}
if kw in rates_kws:
return "rates_cn"
if kw in commodity_kws:
return "commodity_cn"
if kw in equity_kws:
return "equity_cn_sector"
return "equity_cn_sector"
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe.json")
ap.add_argument("--out", default=None)
ap.add_argument("--per_keyword", type=int, default=2)
ap.add_argument("--min_median_amount", type=float, default=0.0)
ap.add_argument(
"--keywords",
default=(
"半导体,芯片,通信,5G,通信设备,军工,机器人,工业母机,智能制造,消费电子,AI,算力,软件,创新药,医药,新能源,光伏,锂电,电池,"
"矿业,有色,稀土,新材料,黄金,白银,原油,煤,化工,豆粕,农业,国债,政金债"
),
)
args = ap.parse_args()
cfg = load_tushare_config()
api = pro_api(cfg)
conf_path = Path(args.config)
conf = json.loads(conf_path.read_text(encoding="utf-8"))
assets = conf.get("assets", [])
have = {a["ts_code"] for a in assets}
kw_list = [k.strip() for k in str(args.keywords).split(",") if k.strip()]
fb = api.fund_basic(market="E", status="L", fields="ts_code,name")
if fb is None or fb.empty:
raise RuntimeError("fund_basic returned empty")
fb = fb.dropna(subset=["ts_code", "name"]).copy()
end = date.today().strftime("%Y%m%d")
start = (date.today() - timedelta(days=180)).strftime("%Y%m%d")
buckets: dict[str, list[tuple[str, str]]] = defaultdict(list)
for _, r in fb.iterrows():
ts_code = str(r["ts_code"]).strip()
name = str(r["name"]).strip()
for kw in kw_list:
if kw in name:
buckets[kw].append((ts_code, name))
break
chosen: list[tuple[str, str, str, float, str]] = []
for kw in kw_list:
cands = buckets.get(kw, [])
if not cands:
continue
scored: list[tuple[float, str, str]] = []
for ts_code, name in cands:
if ts_code in have:
continue
try:
m = median_amount(cfg, ts_code, start, end)
except Exception:
m = 0.0
if not math.isfinite(m) or m <= 0:
continue
if m < float(args.min_median_amount):
continue
scored.append((m, ts_code, name))
scored.sort(reverse=True)
for m, ts_code, name in scored[: int(args.per_keyword)]:
cls = classify_by_keyword(kw)
chosen.append((kw, ts_code, name, m, cls))
for kw, ts_code, name, m, cls in chosen:
assets.append({"ts_code": ts_code, "asset_class": cls, "name": name})
have.add(ts_code)
conf["assets"] = assets
out_path = Path(args.out) if args.out else conf_path
out_path.write_text(json.dumps(conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
print(f"added {len(chosen)} ETFs")
for kw, ts_code, name, m, cls in chosen[:80]:
print(f"{kw}\t{ts_code}\t{m:.0f}\t{cls}\t{name}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,128 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
import numpy as np
import pandas as pd
def load_prices(raw_dir: Path, ts_code: str) -> pd.DataFrame:
fn = raw_dir / f"{ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df.sort_values("trade_date").reset_index(drop=True)
return df
def ann_vol(ret1: pd.Series) -> float:
r = ret1.dropna()
if len(r) < 50:
return float("nan")
return float(r.std(ddof=1) * np.sqrt(252.0))
def max_drawdown(close: pd.Series) -> float:
c = close.astype(float)
if c.isna().all() or len(c) < 50:
return float("nan")
eq = c / float(c.iloc[0])
dd = eq / eq.cummax() - 1.0
return float(dd.min())
def bias_stats(close: pd.Series, ma_n: int = 20) -> tuple[float, float]:
c = close.astype(float)
ma = c.rolling(ma_n, min_periods=ma_n).mean()
b = (c / ma - 1.0).dropna()
if len(b) < 50:
return float("nan"), float("nan")
return float(b.mean()), float(b.std(ddof=1))
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe_industry_only.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--window", type=int, default=504, help="profile window in trading days")
ap.add_argument("--out", default="data/universe_profile.parquet")
# filters (keep high vol, avoid pathological drawdowns)
ap.add_argument("--min_ann_vol", type=float, default=0.18)
ap.add_argument("--max_dd_floor", type=float, default=-0.65, help="drop assets with max_dd < floor")
ap.add_argument("--min_bias_std", type=float, default=0.02)
ap.add_argument("--max_bias_std", type=float, default=0.20)
ap.add_argument("--top", type=int, default=40, help="how many to keep after scoring")
ap.add_argument("--out_config", default="configs/etf_universe_industry_profiled.json")
args = ap.parse_args()
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
assets = conf["assets"]
raw = Path(args.rawdir)
rows = []
for a in assets:
ts = a["ts_code"]
df = load_prices(raw, ts)
df = df[(df["trade_date"] >= args.start) & (df["trade_date"] <= args.end)]
if len(df) < int(args.window) + 50:
continue
tail = df.tail(int(args.window))
close = tail["close"].astype(float)
ret1 = close.pct_change()
v = ann_vol(ret1)
dd = max_drawdown(close)
bmu, bsd = bias_stats(close, 20)
rows.append(
{
"ts_code": ts,
"name": a.get("name"),
"asset_class": a.get("asset_class"),
"ann_vol": v,
"max_dd": dd,
"bias20_mean": bmu,
"bias20_std": bsd,
}
)
prof = pd.DataFrame(rows)
if prof.empty:
raise SystemExit("no assets profiled")
prof.to_parquet(args.out, index=False)
# filter
f = prof.copy()
f = f[np.isfinite(f["ann_vol"]) & np.isfinite(f["max_dd"]) & np.isfinite(f["bias20_std"])].copy()
f = f[(f["ann_vol"] >= float(args.min_ann_vol))]
f = f[(f["max_dd"] >= float(args.max_dd_floor))]
f = f[(f["bias20_std"] >= float(args.min_bias_std)) & (f["bias20_std"] <= float(args.max_bias_std))]
# score: prefer high vol and stable (less extreme dd). still keep high beta.
# normalize with ranks to avoid scale issues
f["r_vol"] = f["ann_vol"].rank(pct=True)
f["r_dd"] = f["max_dd"].rank(pct=True) # less negative => higher rank
f["score"] = 0.70 * f["r_vol"] + 0.30 * f["r_dd"]
f = f.sort_values("score", ascending=False)
keep = set(f.head(int(args.top))["ts_code"].tolist())
new_conf = conf.copy()
new_conf["assets"] = [a for a in assets if a["ts_code"] in keep]
Path(args.out_config).write_text(json.dumps(new_conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
print("profiled", len(prof), "filtered_keep", len(new_conf["assets"]))
print(f.head(15)[["ts_code", "ann_vol", "max_dd", "bias20_std", "score"]].to_string(index=False))
if __name__ == "__main__":
main()

159
scripts/grid_search_opt.py Normal file
View File

@@ -0,0 +1,159 @@
from __future__ import annotations
import argparse
import itertools
import json
import random
from dataclasses import asdict, replace
from pathlib import Path
import numpy as np
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy", "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, risk_proxy, rates_fallback
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
dd = float((equity / equity.cummax() - 1.0).min())
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--out", default="data/grid_search_results.parquet")
ap.add_argument("--seed", type=int, default=1)
ap.add_argument("--max_combos", type=int, default=400, help="Randomly sample at most this many combos")
args = ap.parse_args()
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
base = TrendParams(target_ann_vol=0.25)
# Keep grid small. We will sample max_combos from the full cartesian product.
grid = {
"sma_fast": [3, 5, 8],
"sma_slow": [15, 20, 30, 40],
"lazy_days": [2, 5],
"rebalance_band": [0.03, 0.06],
"atr_mult": [2.5, 3.2, 4.0],
"profit_tighten_atr": [3.0, 4.0],
"atr_mult_profit": [1.5, 2.0],
"stop_loss_atr": [2.5, 3.2],
"bias_exit": [0.12, 0.18],
"vol_ratio_exit": [2.0, 3.0],
"max_weight_per_asset": [0.7, 0.9],
"concentration_power": [1.6, 2.2],
}
keys = list(grid.keys())
combos = list(itertools.product(*(grid[k] for k in keys)))
random.seed(int(args.seed))
if int(args.max_combos) > 0 and len(combos) > int(args.max_combos):
combos = random.sample(combos, int(args.max_combos))
rows = []
for vals in combos:
kw = dict(zip(keys, vals))
if int(kw["sma_fast"]) >= int(kw["sma_slow"]):
continue
params = replace(base, **kw, rebalance_every=1, max_positions=constraints.max_positions)
try:
equity, _w, _tr = run_backtest(
prices,
universe,
constraints,
params,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
except Exception:
continue
st = perf_stats(equity["equity"])
if not st:
continue
row = {**st, **asdict(params)}
rows.append(row)
df = pd.DataFrame(rows)
if df.empty:
print("no results")
return
df = df[df["ann_vol"] <= 0.25].copy()
df = df.sort_values(["ann_return", "sharpe"], ascending=False)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(out, index=False)
cols = [
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"sma_fast",
"sma_slow",
"lazy_days",
"rebalance_band",
"atr_mult",
"profit_tighten_atr",
"atr_mult_profit",
"stop_loss_atr",
"bias_exit",
"vol_ratio_exit",
"max_weight_per_asset",
"concentration_power",
]
print("top10")
print(df[cols].head(10).to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,130 @@
from __future__ import annotations
import argparse
import itertools
import json
from dataclasses import asdict, replace
from pathlib import Path
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy", "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, risk_proxy, rates_fallback
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
dd = float((equity / equity.cummax() - 1.0).min())
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--out", default="data/grid_stage_a.parquet")
args = ap.parse_args()
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
base = TrendParams(target_ann_vol=0.25)
sma_fast_list = [3, 5, 8]
sma_slow_list = [15, 20, 30, 40]
lazy_days_list = [1, 2, 5, 10]
band_list = [0.03, 0.05, 0.08]
atr_mult_list = [2.5, 3.0, 3.2, 4.0]
rows = []
for sma_fast, sma_slow, lazy_days, band, atr_mult in itertools.product(
sma_fast_list, sma_slow_list, lazy_days_list, band_list, atr_mult_list
):
if sma_fast >= sma_slow:
continue
params = replace(
base,
rebalance_every=1,
max_positions=constraints.max_positions,
sma_fast=sma_fast,
sma_slow=sma_slow,
lazy_days=lazy_days,
rebalance_band=band,
atr_mult=float(atr_mult),
)
try:
equity, _w, _tr = run_backtest(
prices,
universe,
constraints,
params,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
except Exception:
continue
st = perf_stats(equity["equity"])
if not st:
continue
row = {**st, **asdict(params)}
rows.append(row)
df = pd.DataFrame(rows)
if df.empty:
print("no results")
return
df = df[df["ann_vol"] <= 0.25].copy()
df = df.sort_values(["ann_return", "sharpe"], ascending=False)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(out, index=False)
cols = ["ann_return", "ann_vol", "max_drawdown", "sharpe", "sma_fast", "sma_slow", "lazy_days", "rebalance_band", "atr_mult"]
print("top10")
print(df[cols].head(10).to_string(index=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,472 @@
from __future__ import annotations
import argparse
import json
import random
import sqlite3
from dataclasses import asdict, fields, replace
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 3)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, str(risk_proxy), str(rates_fallback)
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252**0.5))
dd = float((equity / equity.cummax() - 1.0).min())
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
def trades_per_year(trades: pd.DataFrame | None, start: str, end: str) -> float:
if trades is None or getattr(trades, "empty", True):
return 0.0
years = max(1, (int(end[:4]) - int(start[:4]) + 1))
return float(len(trades) / years)
def ensure_db(db_path: Path, param_cols: list[str]) -> None:
db_path.parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(str(db_path)) as con:
con.execute("PRAGMA journal_mode=WAL")
con.execute("PRAGMA synchronous=NORMAL")
con.execute(
"""
CREATE TABLE IF NOT EXISTS trials (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id TEXT NOT NULL,
ts_utc TEXT NOT NULL,
code_version TEXT,
config_path TEXT,
start TEXT,
end TEXT,
seed INTEGER,
trial INTEGER,
jobs INTEGER,
ann_return REAL,
ann_vol REAL,
max_drawdown REAL,
sharpe REAL,
trades_per_year REAL
)
"""
)
for c in param_cols:
try:
con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL")
except sqlite3.OperationalError:
pass
def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None:
if not rows:
return
cols = [
"run_id",
"ts_utc",
"code_version",
"config_path",
"start",
"end",
"seed",
"trial",
"jobs",
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"trades_per_year",
*param_cols,
]
q = ",".join(["?"] * len(cols))
join_cols = ",".join(cols)
sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})"
vals = []
for r in rows:
vals.append([r.get(c) for c in cols])
with sqlite3.connect(str(db_path)) as con:
con.executemany(sql, vals)
con.commit()
def load_state(path: Path) -> dict:
if path.exists():
return json.loads(path.read_text(encoding="utf-8"))
return {"best": None, "last_reported_ann_return": None, "history": []}
def save_state(path: Path, state: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
def infer_code_version(repo_dir: Path) -> str:
head = repo_dir / ".git" / "HEAD"
if head.exists():
try:
txt = head.read_text(encoding="utf-8").strip()
if txt.startswith("ref:"):
ref = txt.split(" ", 1)[1]
ref_path = repo_dir / ".git" / ref
if ref_path.exists():
return ref_path.read_text(encoding="utf-8").strip()
return txt
except Exception:
return "unknown"
return "nogit"
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--trials", type=int, default=20)
ap.add_argument("--seed", type=int, default=1)
ap.add_argument("--jobs", type=int, default=1)
ap.add_argument("--state", default="data/opt_state.json")
ap.add_argument("--db", default="data/experiments.sqlite")
ap.add_argument("--max_trades_per_year", type=float, default=80.0)
ap.add_argument("--progress_every", type=int, default=5)
ap.add_argument(
"--tweak",
action="append",
default=[],
help=(
"Enable a tweak group. Repeatable. Options: macro, churn, stops, score, switches, switches2, signal1, orth_ma, orth_weights, orth_mech, asym_fast, positions, exits. "
"(Each group adjusts <=4 params around current best.)"
),
)
args = ap.parse_args()
rng = random.Random(int(args.seed))
np.random.seed(int(args.seed))
config_path = Path(args.config)
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
prices = load_prices(Path(args.rawdir), universe, str(args.start), str(args.end))
state_path = Path(args.state)
state = load_state(state_path)
best_row = state.get("best")
if not best_row:
raise SystemExit("opt_state.json missing best")
tp_fields = {f.name for f in fields(TrendParams)}
defaults = TrendParams(max_positions=constraints.max_positions)
best_params = {k: best_row[k] for k in best_row.keys() if k in tp_fields}
typed: dict[str, Any] = {}
for k, v in best_params.items():
t = type(getattr(defaults, k))
if t is int:
typed[k] = int(v)
elif t is float:
typed[k] = float(v)
else:
typed[k] = v
base = replace(defaults, **typed)
tweaks = set(args.tweak or [])
def sample_params() -> TrendParams:
p = base
if "macro" in tweaks:
p = replace(
p,
macro_min_breadth=float(rng.choice([0.10, 0.12, 0.15, 0.18, 0.20])),
macro_down_frac=float(rng.choice([0.75, 0.78, 0.80, 0.82, 0.85])),
)
if "churn" in tweaks:
p = replace(
p,
lazy_days=int(rng.choice([6, 8, 10])),
min_hold_days=int(rng.choice([2, 3, 4, 5])),
replace_score_gap=float(rng.choice([0.5, 0.8, 1.2, 1.6])),
)
if "switches" in tweaks:
# switch/constraint knobs (exactly 4 factors)
p = replace(
p,
desired_positions_min=int(rng.choice([1, 2, 3])),
replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
lazy_days=int(rng.choice([4, 6, 8, 10, 12])),
min_hold_days=int(rng.choice([1, 2, 3, 4, 5])),
)
if "switches2" in tweaks:
# route D churn control without forcing higher min holdings (desired_positions_min fixed)
# exactly 4 factors: replace_score_gap, lazy_days, min_hold_days, cooldown_days
p = replace(
p,
desired_positions_min=int(1),
replace_score_gap=float(rng.choice([0.5, 0.8, 1.0, 1.2, 1.6])),
lazy_days=int(rng.choice([8, 10, 12, 14, 16])),
min_hold_days=int(rng.choice([3, 5, 7, 10])),
cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
)
if "signal1" in tweaks:
# route D: improve signal quality (exactly 4 factors)
p = replace(
p,
min_score=float(rng.choice([0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])),
trend_strength_weight=float(rng.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])),
score_vol_denom_floor=float(rng.choice([0.01, 0.02, 0.03, 0.04, 0.05])),
macro_min_breadth=float(rng.choice([0.10, 0.15, 0.20, 0.25, 0.30])),
)
if "orth_ma" in tweaks:
# route R: orthogonal to score/stops/exits; explore timing knobs (exactly 4 factors)
p = replace(
p,
sma_fast=int(rng.choice([3, 5, 7, 9, 12])),
sma_slow=int(rng.choice([20, 30, 40, 60, 90])),
rebalance_every=int(rng.choice([1, 2, 3, 5])),
max_replaces_per_day=int(rng.choice([0, 1, 2])),
)
if p.sma_fast >= p.sma_slow:
p = replace(p, sma_fast=max(3, int(p.sma_slow // 6)))
if "orth_weights" in tweaks:
# route R: orthogonal portfolio weight shape (exactly 4 factors)
max_positions = int(rng.choice([2, 3, 4, 5]))
desired_min = int(rng.choice([1, 2, 3]))
desired_max = int(rng.choice([2, 3, 4, 5]))
desired_min = min(desired_min, desired_max)
desired_max = min(desired_max, max_positions)
desired_min = min(desired_min, desired_max)
p = replace(
p,
max_positions=max_positions,
desired_positions_min=desired_min,
desired_positions_max=desired_max,
max_weight_per_asset=float(rng.choice([0.35, 0.45, 0.60, 0.75, 0.90, 1.00])),
)
# concentration_power exists in TrendParams; adjust it separately (still counts as one factor)
p = replace(p, concentration_power=float(rng.choice([1.2, 1.6, 2.0, 2.2, 2.6, 3.0])))
if "orth_mech" in tweaks:
# route R: mechanism/turnover knobs (exactly 4 factors)
p = replace(
p,
rebalance_every=int(rng.choice([1, 2, 3, 5])),
replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
max_replaces_per_day=int(rng.choice([0, 1, 2, 3])),
cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
)
if "asym_fast" in tweaks:
# asymmetric bull/bear risk controls (fast-run) (exactly 4 factors)
p = replace(
p,
regime_confirm_days=int(rng.choice([2, 3, 4, 5])),
bull_atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
bear_atr_mult=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
bear_stop_loss_atr=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
)
if "positions" in tweaks:
# concentration/positioning knobs (exactly 4 factors)
max_positions = int(rng.choice([2, 3, 4]))
desired_min = int(rng.choice([1, 2, 3]))
desired_max = int(rng.choice([2, 3, 4]))
# keep consistent
desired_min = min(desired_min, desired_max)
desired_max = min(desired_max, max_positions)
desired_min = min(desired_min, desired_max)
p = replace(
p,
max_positions=max_positions,
desired_positions_min=desired_min,
desired_positions_max=desired_max,
max_weight_per_asset=float(rng.choice([0.45, 0.60, 0.75, 0.90, 1.00])),
)
if "stops" in tweaks:
# risk-control fine search (route D: prefer higher sharpe / lower drawdown)
p = replace(
p,
atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
stop_loss_atr=float(rng.choice([2.4, 2.6, 2.8, 3.0, 3.2])),
profit_tighten_atr=float(rng.choice([4.0, 6.0, 8.0])),
atr_mult_profit=float(rng.choice([1.3, 1.5, 1.8, 2.0])),
)
if "exits" in tweaks:
# anomaly exits fine search (route D) - exactly 4 factors
p = replace(
p,
bias_window=int(rng.choice([10, 15, 20, 30])),
bias_exit=float(rng.choice([0.12, 0.16, 0.20, 0.25, 0.30])),
vol_short=int(rng.choice([3, 5, 8, 10])),
vol_ratio_exit=float(rng.choice([2.0, 2.5, 3.0, 3.5, 4.0])),
)
if "score" in tweaks:
# aggressive weight search for higher ann_return
p = replace(
p,
min_score=float(rng.choice([-0.10, 0.00, 0.05, 0.10, 0.20, 0.30, 0.40])),
trend_strength_weight=float(rng.choice([0.00, 0.20, 0.40, 0.60, 0.80, 1.00])),
w_r20=float(rng.choice([0.20, 0.35, 0.50, 0.65, 0.80])),
w_r60=float(rng.choice([0.00, 0.10, 0.20, 0.35, 0.50])),
)
remain = 1.0 - (p.w_r20 + p.w_r60)
w_r5 = float(max(0.0, min(0.6, remain * 0.6)))
w_r120 = float(max(0.0, remain - w_r5))
p = replace(p, w_r5=w_r5, w_r120=w_r120)
return p
param_cols = sorted(asdict(base).keys())
db_path = Path(args.db)
ensure_db(db_path, param_cols=param_cols)
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_bestlocal_seed{int(args.seed)}" + ("_" + "-".join(sorted(tweaks)) if tweaks else "")
code_version = infer_code_version(Path("."))
best_ann = float(best_row.get("ann_return") or float("-inf"))
rows_for_db: list[dict[str, Any]] = []
valid = 0
for t in range(int(args.trials)):
p = sample_params()
equity, _w, tr = run_backtest(
prices,
universe,
constraints,
p,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
st = perf_stats(equity["equity"])
if not st:
continue
tpy = trades_per_year(tr, str(args.start), str(args.end))
if tpy > float(args.max_trades_per_year):
continue
valid += 1
row = {**st, "trades_per_year": float(tpy), **asdict(p)}
row["trial"] = int(t)
row["seed"] = int(args.seed)
if float(row["ann_return"]) > best_ann:
best_ann = float(row["ann_return"])
state["best"] = row
save_state(state_path, state)
db_row = {
"run_id": run_id,
"ts_utc": datetime.now(timezone.utc).isoformat(),
"code_version": code_version,
"config_path": str(config_path),
"start": str(args.start),
"end": str(args.end),
"seed": int(args.seed),
"trial": int(t),
"jobs": int(args.jobs),
"ann_return": float(row["ann_return"]),
"ann_vol": float(row["ann_vol"]),
"max_drawdown": float(row["max_drawdown"]),
"sharpe": float(row["sharpe"]),
"trades_per_year": float(row["trades_per_year"]),
}
for c in param_cols:
db_row[c] = row.get(c)
rows_for_db.append(db_row)
if int(args.progress_every) > 0 and valid % int(args.progress_every) == 0:
print(f"progress valid={valid} best_ann={best_ann:.4f}", flush=True)
if rows_for_db:
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
state.setdefault("history", []).append(
{
"timestamp": datetime.now(timezone.utc).isoformat(),
"run_id": run_id,
"code_version": code_version,
"config": str(args.config),
"start": str(args.start),
"end": str(args.end),
"trials": int(args.trials),
"jobs": int(args.jobs),
"best_ann_return": float(best_ann) if np.isfinite(best_ann) else None,
"db": str(args.db),
"base_from": "opt_state.best",
"tweaks": sorted(tweaks),
}
)
save_state(state_path, state)
df = pd.DataFrame(rows_for_db).sort_values(["ann_return"], ascending=False)
view_cols = [
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"trades_per_year",
"atr_mult",
"stop_loss_atr",
"profit_tighten_atr",
"atr_mult_profit",
]
view_cols = [c for c in view_cols if c in df.columns]
print("run_id", run_id)
print(df[view_cols].head(8).to_string(index=False))
if __name__ == "__main__":
main()

499
scripts/iterate_optimize.py Normal file
View File

@@ -0,0 +1,499 @@
from __future__ import annotations
import argparse
import json
import os
import random
import sqlite3
from dataclasses import asdict, replace
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
# Globals for multiprocessing (fork mode shares memory COW)
_G_PRICES: dict[str, pd.DataFrame] | None = None
_G_UNIVERSE: list[UniverseAsset] | None = None
_G_CONSTRAINTS: Constraints | None = None
_G_RISK_PROXY: str | None = None
_G_RATES_FALLBACK: str | None = None
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 3)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, str(risk_proxy), str(rates_fallback)
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252**0.5))
dd = float((equity / equity.cummax() - 1.0).min())
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
def trades_per_year(trades: pd.DataFrame, start: str, end: str) -> float:
if trades is None or trades.empty:
return 0.0
years = max(1, (int(end[:4]) - int(start[:4]) + 1))
return float(len(trades) / years)
def load_state(path: Path) -> dict:
if path.exists():
return json.loads(path.read_text(encoding="utf-8"))
return {"best": None, "last_reported_ann_return": None, "history": []}
def save_state(path: Path, state: dict) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
def infer_code_version(repo_dir: Path) -> str:
# Prefer git commit hash if available.
head = repo_dir / ".git" / "HEAD"
if head.exists():
try:
txt = head.read_text(encoding="utf-8").strip()
if txt.startswith("ref:"):
ref = txt.split(" ", 1)[1]
ref_path = repo_dir / ".git" / ref
if ref_path.exists():
return ref_path.read_text(encoding="utf-8").strip()
return txt
except Exception:
return "unknown"
return "nogit"
def ensure_db(db_path: Path, param_cols: list[str]) -> None:
db_path.parent.mkdir(parents=True, exist_ok=True)
with sqlite3.connect(str(db_path)) as con:
con.execute("PRAGMA journal_mode=WAL")
con.execute("PRAGMA synchronous=NORMAL")
con.execute(
"""
CREATE TABLE IF NOT EXISTS trials (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id TEXT NOT NULL,
ts_utc TEXT NOT NULL,
code_version TEXT,
config_path TEXT,
start TEXT,
end TEXT,
seed INTEGER,
trial INTEGER,
jobs INTEGER,
ann_return REAL,
ann_vol REAL,
max_drawdown REAL,
sharpe REAL,
trades_per_year REAL
)
"""
)
# Add param columns if missing (structured fields)
for c in param_cols:
try:
con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL")
except sqlite3.OperationalError:
pass
def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None:
if not rows:
return
cols = [
"run_id",
"ts_utc",
"code_version",
"config_path",
"start",
"end",
"seed",
"trial",
"jobs",
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"trades_per_year",
*param_cols,
]
q = ",".join(["?"] * len(cols))
join_cols = ",".join(cols)
sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})"
vals = []
for r in rows:
vals.append([r.get(c) for c in cols])
with sqlite3.connect(str(db_path)) as con:
con.executemany(sql, vals)
con.commit()
def reservoir_sample_product(rng, iterables, k: int):
"""Sample up to k combos from cartesian product."""
import itertools
sample = []
n = 0
for combo in itertools.product(*iterables):
n += 1
if len(sample) < k:
sample.append(combo)
else:
j = rng.randrange(n)
if j < k:
sample[j] = combo
return sample
def _init_globals(prices: dict[str, pd.DataFrame], universe: list[UniverseAsset], constraints: Constraints, risk_proxy: str, rates_fallback: str) -> None:
global _G_PRICES, _G_UNIVERSE, _G_CONSTRAINTS, _G_RISK_PROXY, _G_RATES_FALLBACK
_G_PRICES = prices
_G_UNIVERSE = universe
_G_CONSTRAINTS = constraints
_G_RISK_PROXY = risk_proxy
_G_RATES_FALLBACK = rates_fallback
def _eval_one(task: dict[str, Any]) -> dict[str, Any] | None:
assert _G_PRICES is not None
assert _G_UNIVERSE is not None
assert _G_CONSTRAINTS is not None
assert _G_RISK_PROXY is not None
assert _G_RATES_FALLBACK is not None
params = TrendParams()
params = replace(params, **task["params"])
try:
equity, _w, tr = run_backtest(
_G_PRICES,
_G_UNIVERSE,
_G_CONSTRAINTS,
params,
rates_fallback=_G_RATES_FALLBACK,
risk_proxy=_G_RISK_PROXY,
)
except Exception:
return None
st = perf_stats(equity["equity"])
if not st:
return None
tpy = trades_per_year(tr, task["start"], task["end"])
if tpy > float(task["max_trades_per_year"]):
return None
row = {**st, "trades_per_year": float(tpy), **asdict(params)}
row["trial"] = int(task["trial"])
row["seed"] = int(task["seed"])
return row
MAX_GRID_COMBOS = 128
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default="20200101")
ap.add_argument("--end", default="20251231")
ap.add_argument("--trials", type=int, default=240)
ap.add_argument("--mode", choices=["random", "grid"], default="random")
ap.add_argument("--max_grid", type=int, default=MAX_GRID_COMBOS)
ap.add_argument("--seed", type=int, default=1)
ap.add_argument("--jobs", type=int, default=1, help="Parallel workers (processes), up to 8")
ap.add_argument("--state", default="data/opt_state.json")
ap.add_argument("--db", default="data/experiments.sqlite")
ap.add_argument("--baseline", type=float, default=None)
ap.add_argument("--report_step", type=float, default=0.05)
ap.add_argument("--max_trades_per_year", type=float, default=80.0)
ap.add_argument("--progress_every", type=int, default=25)
args = ap.parse_args()
jobs = max(1, min(8, int(args.jobs)))
random.seed(args.seed)
np.random.seed(args.seed)
config_path = Path(args.config)
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
_init_globals(prices, universe, constraints, risk_proxy, rates_fallback)
state_path = Path(args.state)
state = load_state(state_path)
best = state.get("best")
best_ann = float(best["ann_return"]) if best else float("-inf")
baseline = args.baseline
if baseline is None:
baseline = best_ann if np.isfinite(best_ann) else 0.0
last_rep = state.get("last_reported_ann_return")
if last_rep is None:
last_rep = baseline
params0 = TrendParams(max_positions=constraints.max_positions)
params0_dict = asdict(params0)
# Parameter columns to persist as structured fields in SQLite
param_cols = sorted(params0_dict.keys())
db_path = Path(args.db)
ensure_db(db_path, param_cols=param_cols)
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_seed{int(args.seed)}"
code_version = infer_code_version(Path("."))
tasks: list[dict[str, Any]] = []
rng = random.Random(int(args.seed))
if str(args.mode) == "grid":
grids = {
"sma_fast": [3, 5],
"sma_slow": [15, 20, 30],
"lazy_days": [4, 5, 6, 8],
"min_hold_days": [2, 3, 5],
"replace_score_gap": [0.5, 0.8, 1.2, 1.6],
"min_score": [0.0, 0.2, 0.4, 0.6],
"desired_positions_min": [1, 2],
"macro_min_breadth": [0.10, 0.15, 0.20, 0.30],
"macro_down_frac": [0.75, 0.80, 0.85],
"atr_mult": [2.5, 3.2, 4.0],
"stop_loss_atr": [2.0, 2.5, 3.2],
"profit_tighten_atr": [4.0, 6.0, 8.0],
"atr_mult_profit": [1.5, 2.0, 2.5],
"bias_exit": [0.12, 0.18, 0.25],
"vol_ratio_exit": [3.0, 4.0],
}
keys = list(grids.keys())
iters = [list(grids[k]) for k in keys]
total = 1
for xs in iters:
total *= max(1, len(xs))
max_grid = max(1, int(args.max_grid))
if total > max_grid:
print(f"grid combos {total} > {max_grid}; sampling combos", flush=True)
combos = reservoir_sample_product(rng, iters, max_grid)
else:
import itertools
combos = list(itertools.product(*iters))
for t, combo in enumerate(combos):
vals = dict(zip(keys, combo))
sma_fast = int(vals["sma_fast"])
sma_slow = int(vals["sma_slow"])
if sma_fast >= sma_slow:
continue
p = replace(
params0,
sma_fast=sma_fast,
sma_slow=sma_slow,
lazy_days=int(vals["lazy_days"]),
min_hold_days=int(vals["min_hold_days"]),
replace_score_gap=float(vals["replace_score_gap"]),
min_score=float(vals["min_score"]),
desired_positions_min=int(vals["desired_positions_min"]),
desired_positions_max=int(3),
macro_min_breadth=float(vals["macro_min_breadth"]),
macro_down_frac=float(vals["macro_down_frac"]),
atr_mult=float(vals["atr_mult"]),
stop_loss_atr=float(vals["stop_loss_atr"]),
profit_tighten_atr=float(vals["profit_tighten_atr"]),
atr_mult_profit=float(vals["atr_mult_profit"]),
bias_exit=float(vals["bias_exit"]),
vol_ratio_exit=float(vals["vol_ratio_exit"]),
rebalance_every=1,
)
tasks.append({
"trial": int(t),
"seed": int(args.seed),
"start": str(args.start),
"end": str(args.end),
"max_trades_per_year": float(args.max_trades_per_year),
"params": {k: asdict(p)[k] for k in param_cols},
})
else:
for t in range(int(args.trials)):
sma_fast = rng.choice([3, 5])
sma_slow = rng.choice([15, 20, 30])
if sma_fast >= sma_slow:
continue
lazy_days = rng.choice([4, 5, 6, 8])
min_hold = rng.choice([2, 3, 5])
replace_gap = rng.choice([0.5, 0.8, 1.2, 1.6])
min_score = rng.choice([0.0, 0.2, 0.4, 0.6])
dmin = rng.choice([1, 2])
dmax = 3
macro_min_breadth = rng.choice([0.10, 0.15, 0.20, 0.30])
macro_down_frac = rng.choice([0.75, 0.80, 0.85])
atr_mult = rng.choice([2.5, 3.2, 4.0])
stop_loss_atr = rng.choice([2.0, 2.5, 3.2])
profit_tighten_atr = rng.choice([4.0, 6.0, 8.0])
atr_mult_profit = rng.choice([1.5, 2.0, 2.5])
bias_exit = rng.choice([0.12, 0.18, 0.25])
vol_ratio_exit = rng.choice([3.0, 4.0])
p = replace(params0, sma_fast=int(sma_fast), sma_slow=int(sma_slow), lazy_days=int(lazy_days), min_hold_days=int(min_hold), replace_score_gap=float(replace_gap), min_score=float(min_score), desired_positions_min=int(dmin), desired_positions_max=int(dmax), macro_min_breadth=float(macro_min_breadth), macro_down_frac=float(macro_down_frac), atr_mult=float(atr_mult), stop_loss_atr=float(stop_loss_atr), profit_tighten_atr=float(profit_tighten_atr), atr_mult_profit=float(atr_mult_profit), bias_exit=float(bias_exit), vol_ratio_exit=float(vol_ratio_exit), rebalance_every=1)
tasks.append({"trial": int(t), "seed": int(args.seed), "start": str(args.start), "end": str(args.end), "max_trades_per_year": float(args.max_trades_per_year), "params": {k: asdict(p)[k] for k in param_cols}})
results: list[dict[str, Any]] = []
rows_for_db: list[dict[str, Any]] = []
def record_row(row: dict[str, Any]) -> None:
nonlocal best_ann
results.append(row)
if float(row["ann_return"]) > best_ann:
best_ann = float(row["ann_return"])
state["best"] = row
save_state(state_path, state)
db_row = {
"run_id": run_id,
"ts_utc": datetime.now(timezone.utc).isoformat(),
"code_version": code_version,
"config_path": str(config_path),
"start": str(args.start),
"end": str(args.end),
"seed": int(args.seed),
"trial": int(row.get("trial", -1)),
"jobs": int(jobs),
"ann_return": float(row["ann_return"]),
"ann_vol": float(row["ann_vol"]),
"max_drawdown": float(row["max_drawdown"]),
"sharpe": float(row["sharpe"]),
"trades_per_year": float(row["trades_per_year"]),
}
for c in param_cols:
db_row[c] = row.get(c)
rows_for_db.append(db_row)
if len(rows_for_db) >= 200:
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
rows_for_db.clear()
if jobs == 1:
for task in tasks:
row = _eval_one(task)
if row is None:
continue
record_row(row)
if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0):
print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True)
else:
import multiprocessing as mp
from concurrent.futures import ProcessPoolExecutor, as_completed
ctx = mp.get_context("fork")
with ProcessPoolExecutor(max_workers=jobs, mp_context=ctx) as ex:
futs = [ex.submit(_eval_one, task) for task in tasks]
for fut in as_completed(futs):
row = fut.result()
if row is None:
continue
record_row(row)
if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0):
print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True)
if rows_for_db:
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
rows_for_db.clear()
state["history"].append(
{
"timestamp": datetime.now(timezone.utc).isoformat(),
"run_id": run_id,
"code_version": code_version,
"config": str(args.config),
"start": str(args.start),
"end": str(args.end),
"trials": int(args.trials),
"jobs": int(jobs),
"best_ann_return": float(best_ann) if np.isfinite(best_ann) else None,
"db": str(args.db),
}
)
save_state(state_path, state)
if not results:
print("no valid trials")
return
df = pd.DataFrame(results).sort_values(["ann_return"], ascending=False)
cols = [
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"trades_per_year",
"sma_fast",
"sma_slow",
"lazy_days",
"min_hold_days",
"replace_score_gap",
"min_score",
"macro_min_breadth",
"macro_down_frac",
"desired_positions_min",
"atr_mult",
"stop_loss_atr",
"profit_tighten_atr",
"atr_mult_profit",
"bias_exit",
"vol_ratio_exit",
]
cols = [c for c in cols if c in df.columns]
print(df[cols].head(12).to_string(index=False))
if best_ann >= float(last_rep) + float(args.report_step):
state["last_reported_ann_return"] = float(best_ann)
save_state(state_path, state)
print("REPORT_TRIGGER", float(best_ann), "baseline", float(last_rep))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,93 @@
from __future__ import annotations
import argparse
import json
import sqlite3
from pathlib import Path
from typing import Any
def fetch_top(con: sqlite3.Connection, run_id: str, limit: int) -> list[dict[str, Any]]:
cols = [r[1] for r in con.execute("PRAGMA table_info(trials)")]
sql = "SELECT * FROM trials WHERE run_id = ? ORDER BY ann_return DESC LIMIT ?"
rows = []
for r in con.execute(sql, [run_id, int(limit)]):
rows.append(dict(zip(cols, r)))
return rows
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--state", default="data/opt_state.json")
ap.add_argument("--db", default="data/experiments.sqlite")
ap.add_argument("--top", type=int, default=5)
args = ap.parse_args()
state_path = Path(args.state)
state = json.loads(state_path.read_text(encoding="utf-8"))
hist = state.get("history") or []
if not hist:
raise SystemExit("no history in opt_state.json")
last = hist[-1]
run_id = str(last.get("run_id"))
best = state.get("best")
print("last_run_id", run_id)
print("last_run", {k: last.get(k) for k in ["timestamp", "seed", "trials", "jobs", "best_ann_return", "code_version"] if k in last})
if best:
print(
"global_best",
{
"ann_return": best.get("ann_return"),
"ann_vol": best.get("ann_vol"),
"max_drawdown": best.get("max_drawdown"),
"sharpe": best.get("sharpe"),
"trades_per_year": best.get("trades_per_year"),
},
)
db_path = Path(args.db)
with sqlite3.connect(str(db_path)) as con:
rows = fetch_top(con, run_id=run_id, limit=int(args.top))
if not rows:
print("no rows for run_id")
return
def slim(r: dict[str, Any]) -> dict[str, Any]:
keys = [
"id",
"trial",
"ann_return",
"ann_vol",
"max_drawdown",
"sharpe",
"trades_per_year",
"sma_fast",
"sma_slow",
"lazy_days",
"min_hold_days",
"replace_score_gap",
"min_score",
"macro_min_breadth",
"macro_down_frac",
"desired_positions_min",
"atr_mult",
"stop_loss_atr",
"profit_tighten_atr",
"atr_mult_profit",
"bias_exit",
"vol_ratio_exit",
]
return {k: r.get(k) for k in keys if k in r}
print("top_trials")
for r in rows:
print(json.dumps(slim(r), ensure_ascii=False))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,158 @@
from __future__ import annotations
import argparse
import json
from dataclasses import fields
from pathlib import Path
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_prices(raw_dir: Path, universe: list[UniverseAsset]) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
if not fn.exists():
raise FileNotFoundError(f"missing data file: {fn}")
df = pd.read_parquet(fn)
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252**0.5))
dd = (equity / equity.cummax() - 1.0).min()
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": float(dd)}
def add_trendparams_args(p: argparse.ArgumentParser) -> None:
# Expose a subset of TrendParams for fast experiments / grid search verification.
# Keep names stable and CLI-friendly (kebab-case).
tp_fields = {f.name: f for f in fields(TrendParams)}
def add(name: str, arg: str, typ, help_: str) -> None:
if name not in tp_fields:
return
p.add_argument(arg, type=typ, default=None, help=help_)
add("sma_fast", "--sma-fast", int, "SMA fast window")
add("sma_slow", "--sma-slow", int, "SMA slow window")
add("lazy_days", "--lazy-days", int, "Min days between switches")
add("min_hold_days", "--min-hold-days", int, "Min hold days before trend-exit/switch")
add("replace_score_gap", "--replace-score-gap", float, "Replace weakest only if score gap >= this")
add("min_score", "--min-score", float, "Entry score threshold (allow empty if not met)")
add("macro_down_frac", "--macro-down-frac", float, "Down-day breadth threshold for consistent down")
add("desired_positions_min", "--desired-positions-min", int, "Desired min positions (allow empty)")
add("desired_positions_max", "--desired-positions-max", int, "Desired max positions")
add("rebalance_band", "--rebalance-band", float, "Ignore small weight changes")
add("atr_mult", "--atr-mult", float, "Chandelier ATR multiple")
add("profit_tighten_atr", "--profit-tighten-atr", float, "Tighten trailing after profit >= N*ATR")
add("atr_mult_profit", "--atr-mult-profit", float, "Chandelier ATR multiple after tighten")
add("stop_loss_atr", "--stop-loss-atr", float, "Hard stop loss from entry in ATR")
add("bias_exit", "--bias-exit", float, "Exit when abs(bias) >= threshold")
add("vol_ratio_exit", "--vol-ratio-exit", float, "Exit when volume/amount ratio >= threshold")
add("max_weight_per_asset", "--max-weight-per-asset", float, "Max weight per risky asset")
add("concentration_power", "--concentration-power", float, "Weight concentration power")
add("macro_min_breadth", "--macro-min-breadth", float, "Min equity breadth to be risk-on")
add("macro_scale_risk_off", "--macro-scale-risk-off", float, "Scale risky weights in risk-off")
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--config", default="configs/etf_universe.json")
p.add_argument("--rawdir", default="data/raw")
p.add_argument("--out", default="data/etf_trend_equity.parquet")
p.add_argument("--start", default="20200101", help="Filter start trade_date YYYYMMDD (inclusive)")
p.add_argument("--end", default="20251231", help="Filter end trade_date YYYYMMDD (inclusive)")
add_trendparams_args(p)
args = p.parse_args()
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 4)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 1)),
must_rates=int(cons.get("must_include", {}).get("rates", 1)),
must_equity=int(cons.get("must_include", {}).get("equity", 1)),
)
params = TrendParams(max_positions=constraints.max_positions)
# apply CLI overrides
overrides = {
"sma_fast": args.sma_fast,
"sma_slow": args.sma_slow,
"lazy_days": args.lazy_days,
"min_hold_days": getattr(args, "min_hold_days", None),
"replace_score_gap": getattr(args, "replace_score_gap", None),
"min_score": getattr(args, "min_score", None),
"macro_down_frac": getattr(args, "macro_down_frac", None),
"desired_positions_min": getattr(args, "desired_positions_min", None),
"desired_positions_max": getattr(args, "desired_positions_max", None),
"rebalance_band": args.rebalance_band,
"atr_mult": args.atr_mult,
"profit_tighten_atr": args.profit_tighten_atr,
"atr_mult_profit": args.atr_mult_profit,
"stop_loss_atr": args.stop_loss_atr,
"bias_exit": args.bias_exit,
"vol_ratio_exit": args.vol_ratio_exit,
"max_weight_per_asset": args.max_weight_per_asset,
"concentration_power": args.concentration_power,
"macro_min_breadth": args.macro_min_breadth,
"macro_scale_risk_off": args.macro_scale_risk_off,
}
overrides = {k: v for k, v in overrides.items() if v is not None}
if overrides:
params = TrendParams(**{**params.__dict__, **overrides})
risk_proxy = cons.get("risk_proxy", "510300.SH")
rates_fallback = cons.get("rates_fallback")
if rates_fallback is None:
for a in universe:
if a.asset_class.startswith("rates"):
rates_fallback = a.ts_code
break
if not rates_fallback:
raise RuntimeError("universe must include a rates asset for fallback")
prices = load_prices(Path(args.rawdir), universe)
for k, df in prices.items():
d = df.copy()
d["trade_date"] = d["trade_date"].astype(str)
d = d[(d["trade_date"] >= str(args.start)) & (d["trade_date"] <= str(args.end))]
prices[k] = d
equity, weights, trades = run_backtest(prices, universe, constraints, params, rates_fallback=rates_fallback, risk_proxy=risk_proxy)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
equity.to_parquet(out)
weights_path = out.with_name(out.stem + "_weights" + out.suffix)
trades_path = out.with_name(out.stem + "_trades" + out.suffix)
weights.to_parquet(weights_path)
if trades is not None and not trades.empty:
trades.to_parquet(trades_path, index=False)
print(f"wrote trades -> {trades_path}")
st = perf_stats(equity["equity"])
print("perf", st)
print("last equity", float(equity["equity"].iloc[-1]))
print("last weights", weights.iloc[-1].sort_values(ascending=False).head(10).to_dict())
if __name__ == "__main__":
main()

0
scripts/run_iter20_loop.sh Executable file
View File

0
scripts/run_macro20.sh Normal file
View File

26
scripts/smoke.py Normal file
View File

@@ -0,0 +1,26 @@
from __future__ import annotations
import numpy as np
import pandas as pd
from qfr.factors import winsorize_by_date, zscore_by_date
from qfr.metrics import information_coefficient
def main() -> None:
dates = pd.to_datetime(["2026-01-01", "2026-01-02", "2026-01-03"])
assets = ["A", "B", "C", "D"]
idx = pd.MultiIndex.from_product([dates, assets], names=["date", "asset"])
rng = np.random.default_rng(42)
factor = pd.Series(rng.normal(size=len(idx)), index=idx)
fwd_ret = pd.Series(rng.normal(scale=0.01, size=len(idx)), index=idx)
factor2 = zscore_by_date(winsorize_by_date(factor))
ic = information_coefficient(factor2, fwd_ret)
print("IC mean:", float(ic.mean()))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,35 @@
from __future__ import annotations
import argparse
from pathlib import Path
from qfr.data.tushare_client import fetch_daily, load_tushare_config
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--env", default=None, help="Path to .env (default: auto-detect)")
p.add_argument("--ts-code", default=None, help="e.g. 000001.SZ")
p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD")
p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD")
p.add_argument("--trade-date", default=None, help="YYYYMMDD")
p.add_argument("--out", default="data/raw/tushare_daily.parquet")
args = p.parse_args()
cfg = load_tushare_config(args.env)
df = fetch_daily(
cfg,
ts_code=args.ts_code,
trade_date=args.trade_date,
start_date=args.start_date,
end_date=args.end_date,
)
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(out, index=False)
print(f"wrote {len(df)} rows -> {out}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,47 @@
from __future__ import annotations
import argparse
import json
from pathlib import Path
import pandas as pd
from qfr.data.tushare_client import fetch_fund_daily, load_tushare_config
def main() -> None:
p = argparse.ArgumentParser()
p.add_argument("--env", default=None, help="Path to .env")
p.add_argument("--config", default="configs/etf_universe.json")
p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD")
p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD")
p.add_argument("--outdir", default="data/raw")
args = p.parse_args()
cfg = load_tushare_config(args.env)
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
assets = conf["assets"]
outdir = Path(args.outdir)
outdir.mkdir(parents=True, exist_ok=True)
for a in assets:
ts_code = a["ts_code"]
df = fetch_fund_daily(cfg, ts_code=ts_code, start_date=args.start_date, end_date=args.end_date)
if df is None or df.empty:
print(f"skip {ts_code}: empty")
continue
# standardize columns expected by backtest
# fund_daily provides: ts_code, trade_date, open, high, low, close, vol, amount
keep = [c for c in ["ts_code", "trade_date", "open", "high", "low", "close", "vol", "amount"] if c in df.columns]
df = df[keep].copy()
df = df.sort_values("trade_date")
out = outdir / f"{ts_code.replace('.', '')}.parquet"
df.to_parquet(out, index=False)
print(f"wrote {ts_code}: {len(df)} rows -> {out}")
if __name__ == "__main__":
main()

150
scripts/verify_topn.py Normal file
View File

@@ -0,0 +1,150 @@
from __future__ import annotations
import argparse
import json
import sqlite3
from dataclasses import fields
from pathlib import Path
from typing import Any
import pandas as pd
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
conf = json.loads(config_path.read_text(encoding="utf-8"))
universe = [UniverseAsset(**a) for a in conf["assets"]]
cons = conf.get("constraints", {})
constraints = Constraints(
max_positions=int(cons.get("max_positions", 3)),
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
)
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
rates_fallback = cons.get("rates_fallback", "511010.SH")
return universe, constraints, str(risk_proxy), str(rates_fallback)
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
out: dict[str, pd.DataFrame] = {}
for a in universe:
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
df = pd.read_parquet(fn)
df = df.copy()
df["trade_date"] = df["trade_date"].astype(str)
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
out[a.ts_code] = df
return out
def perf_stats(equity: pd.Series) -> dict[str, float]:
r = equity.pct_change().dropna()
if r.empty:
return {}
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
ann_vol = float(r.std(ddof=1) * (252**0.5))
dd = float((equity / equity.cummax() - 1.0).min())
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
def table_columns(con: sqlite3.Connection, table: str) -> list[str]:
return [row[1] for row in con.execute(f"PRAGMA table_info({table})")]
def fetch_topn(db_path: Path, run_id: str | None, topn: int) -> tuple[list[str], list[dict[str, Any]]]:
with sqlite3.connect(str(db_path)) as con:
cols = table_columns(con, "trials")
where = ""
params: list[Any] = []
if run_id:
where = "WHERE run_id = ?"
params.append(run_id)
sql = f"SELECT * FROM trials {where} ORDER BY ann_return DESC LIMIT ?"
rows: list[dict[str, Any]] = []
for r in con.execute(sql, [*params, int(topn)]):
rows.append(dict(zip(cols, r)))
return cols, rows
def main() -> None:
ap = argparse.ArgumentParser()
ap.add_argument("--db", default="data/experiments.sqlite")
ap.add_argument("--run_id", default=None)
ap.add_argument("--topn", type=int, default=10)
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
ap.add_argument("--rawdir", default="data/raw")
ap.add_argument("--start", default=None)
ap.add_argument("--end", default=None)
ap.add_argument("--tol", type=float, default=1e-6)
args = ap.parse_args()
db_path = Path(args.db)
cols, rows = fetch_topn(db_path, args.run_id, args.topn)
if not rows:
print("no trials found")
return
config_path = Path(args.config)
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
tp_fields = {f.name for f in fields(TrendParams)}
# Coerce param types: sqlite stores numerics as REAL, so ints may come back as floats.
_defaults = TrendParams()
_field_types = {name: type(getattr(_defaults, name)) for name in tp_fields}
def _coerce(name: str, v):
if v is None:
return None
t = _field_types.get(name)
if t is int:
return int(round(float(v)))
if t is bool:
return bool(int(round(float(v))))
return float(v)
mismatches = 0
for idx, row in enumerate(rows, start=1):
start = str(args.start or row.get("start") or "20200101")
end = str(args.end or row.get("end") or "20251231")
prices = load_prices(Path(args.rawdir), universe, start, end)
params_dict: dict[str, Any] = {}
for k in cols:
if k in tp_fields and row.get(k) is not None:
params_dict[k] = _coerce(k, row[k])
params_dict.setdefault("max_positions", constraints.max_positions)
tp = TrendParams(**params_dict)
equity, _weights, _trades = run_backtest(
prices,
universe,
constraints,
tp,
rates_fallback=rates_fallback,
risk_proxy=risk_proxy,
)
st = perf_stats(equity["equity"])
diffs = {k: float(st[k] - float(row.get(k) or 0.0)) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]}
bad = any(abs(v) > float(args.tol) for v in diffs.values())
if bad:
mismatches += 1
tag = "MISMATCH" if bad else "OK"
print(f"[{idx}] {tag} id={row.get('id')} run_id={row.get('run_id')} start={start} end={end}")
print(" orig:", {k: row.get(k) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]})
print(" re :", st)
print(" diff:", diffs)
print(f"done. mismatches={mismatches}/{len(rows)}")
if __name__ == "__main__":
main()