initial import: etf strategy project
This commit is contained in:
45
scripts/analyze_drawdown.py
Normal file
45
scripts/analyze_drawdown.py
Normal file
@@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--equity", required=True)
|
||||
ap.add_argument("--trades", required=True)
|
||||
ap.add_argument("--top", type=int, default=3)
|
||||
args = ap.parse_args()
|
||||
|
||||
eq = pd.read_parquet(args.equity)
|
||||
eq = eq.copy()
|
||||
eq.index = eq.index.astype(str)
|
||||
s = eq["equity"].astype(float)
|
||||
|
||||
peak = s.cummax()
|
||||
dd = s / peak - 1.0
|
||||
|
||||
# find worst drawdowns by trough
|
||||
worst = dd.nsmallest(args.top)
|
||||
|
||||
tr = pd.read_parquet(args.trades)
|
||||
tr = tr.copy()
|
||||
tr["trade_date"] = tr["trade_date"].astype(str)
|
||||
|
||||
for d, v in worst.items():
|
||||
# drawdown start = last peak before d
|
||||
peak_date = (s.loc[:d]).idxmax()
|
||||
print("---")
|
||||
print("trough", d, "dd", float(v))
|
||||
print("peak", peak_date, "peak_equity", float(s.loc[peak_date]), "trough_equity", float(s.loc[d]))
|
||||
w = tr[(tr["trade_date"] >= peak_date) & (tr["trade_date"] <= d)]
|
||||
print("trades in window", len(w))
|
||||
if not w.empty:
|
||||
cols = [c for c in ["trade_date", "ts_code", "side", "reason", "weight_before", "weight_after", "price"] if c in w.columns]
|
||||
print(w[cols].tail(25).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
164
scripts/auto_tune_etf_trend.py
Normal file
164
scripts/auto_tune_etf_trend.py
Normal file
@@ -0,0 +1,164 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import itertools
|
||||
import json
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy", "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
|
||||
return universe, constraints, risk_proxy, rates_fallback
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--config", default="configs/etf_universe.json")
|
||||
p.add_argument("--rawdir", default="data/raw")
|
||||
p.add_argument("--start", default="20200101")
|
||||
p.add_argument("--end", default="20251231")
|
||||
p.add_argument("--out", default="data/tune_results.parquet")
|
||||
args = p.parse_args()
|
||||
|
||||
config_path = Path(args.config)
|
||||
raw_dir = Path(args.rawdir)
|
||||
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
|
||||
prices = load_prices(raw_dir, universe, args.start, args.end)
|
||||
|
||||
base = TrendParams()
|
||||
|
||||
# small grid to keep runtime reasonable
|
||||
fast_list = [5, 10]
|
||||
slow_list = [20, 40]
|
||||
atr_mult_list = [2.5, 3.0]
|
||||
vol_window_list = [10, 20]
|
||||
port_vol_window_list = [40, 60]
|
||||
max_positions_list = [3, 4]
|
||||
|
||||
rows = []
|
||||
|
||||
for sma_fast, sma_slow, atr_mult, vol_window, port_vol_window, max_positions in itertools.product(
|
||||
fast_list,
|
||||
slow_list,
|
||||
atr_mult_list,
|
||||
vol_window_list,
|
||||
port_vol_window_list,
|
||||
max_positions_list,
|
||||
):
|
||||
if sma_fast >= sma_slow:
|
||||
continue
|
||||
|
||||
params = replace(
|
||||
base,
|
||||
sma_fast=sma_fast,
|
||||
sma_slow=sma_slow,
|
||||
atr_mult=atr_mult,
|
||||
vol_window=vol_window,
|
||||
port_vol_window=port_vol_window,
|
||||
max_positions=max_positions,
|
||||
rebalance_every=1,
|
||||
)
|
||||
|
||||
cons = replace(constraints, max_positions=max_positions)
|
||||
|
||||
equity, _weights = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
cons,
|
||||
params,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
continue
|
||||
|
||||
row = {
|
||||
"sma_fast": sma_fast,
|
||||
"sma_slow": sma_slow,
|
||||
"atr_mult": atr_mult,
|
||||
"vol_window": vol_window,
|
||||
"port_vol_window": port_vol_window,
|
||||
"max_positions": max_positions,
|
||||
**st,
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
if df.empty:
|
||||
print("no results")
|
||||
return
|
||||
|
||||
# filter by vol constraint first, then sort by ann_return
|
||||
filt = df[df["ann_vol"] <= 0.18].copy()
|
||||
if filt.empty:
|
||||
filt = df.copy()
|
||||
|
||||
filt = filt.sort_values(["ann_return", "calmar"], ascending=False)
|
||||
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
filt.to_parquet(out, index=False)
|
||||
|
||||
print("top10")
|
||||
cols = [
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"calmar",
|
||||
"sma_fast",
|
||||
"sma_slow",
|
||||
"atr_mult",
|
||||
"vol_window",
|
||||
"port_vol_window",
|
||||
"max_positions",
|
||||
]
|
||||
print(filt[cols].head(10).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
229
scripts/auto_tune_etf_trend_fast.py
Normal file
229
scripts/auto_tune_etf_trend_fast.py
Normal file
@@ -0,0 +1,229 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import itertools
|
||||
import json
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, compute_features, portfolio_vol, risk_parity_weights, select_portfolio
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy", "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, risk_proxy, rates_fallback
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar}
|
||||
|
||||
|
||||
def run_backtest_cached(
|
||||
feats: dict[str, pd.DataFrame],
|
||||
universe: list[UniverseAsset],
|
||||
constraints: Constraints,
|
||||
params: TrendParams,
|
||||
rates_fallback: str,
|
||||
risk_proxy: str,
|
||||
) -> pd.DataFrame:
|
||||
# align dates intersection
|
||||
dates = None
|
||||
for f in feats.values():
|
||||
d = set(f["trade_date"].astype(str))
|
||||
dates = d if dates is None else dates.intersection(d)
|
||||
if not dates:
|
||||
raise RuntimeError("No overlapping trade_date")
|
||||
all_dates = sorted(dates)
|
||||
|
||||
close_px = pd.DataFrame(index=all_dates)
|
||||
ret1 = pd.DataFrame(index=all_dates)
|
||||
for ts, f in feats.items():
|
||||
g = f.set_index("trade_date").reindex(all_dates)
|
||||
close_px[ts] = g["close"].astype(float)
|
||||
ret1[ts] = close_px[ts].pct_change().fillna(0.0)
|
||||
|
||||
if risk_proxy not in close_px.columns:
|
||||
raise RuntimeError("risk_proxy missing")
|
||||
|
||||
weights = pd.DataFrame(0.0, index=all_dates, columns=close_px.columns)
|
||||
|
||||
in_pos: set[str] = set()
|
||||
highest_close: dict[str, float] = {}
|
||||
|
||||
atr_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["atr"].astype(float) for ts in close_px.columns}
|
||||
mf_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_fast"].astype(float) for ts in close_px.columns}
|
||||
ms_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_slow"].astype(float) for ts in close_px.columns}
|
||||
|
||||
last_reb = -10**9
|
||||
|
||||
for i, d in enumerate(all_dates):
|
||||
if i > 0:
|
||||
weights.loc[d] = weights.iloc[i - 1]
|
||||
|
||||
for ts in list(in_pos):
|
||||
c = float(close_px.loc[d, ts])
|
||||
if np.isfinite(c):
|
||||
highest_close[ts] = max(highest_close.get(ts, c), c)
|
||||
|
||||
# exits
|
||||
for ts in list(in_pos):
|
||||
c = float(close_px.loc[d, ts])
|
||||
mf = float(mf_map[ts].loc[d])
|
||||
ms = float(ms_map[ts].loc[d])
|
||||
atr = float(atr_map[ts].loc[d])
|
||||
h = highest_close.get(ts, c)
|
||||
trend_break = (np.isfinite(mf) and np.isfinite(ms) and (mf < ms))
|
||||
chand_break = np.isfinite(atr) and c < (h - params.atr_mult * atr)
|
||||
if trend_break or chand_break:
|
||||
weights.loc[d, ts] = 0.0
|
||||
in_pos.remove(ts)
|
||||
highest_close.pop(ts, None)
|
||||
|
||||
if (i - last_reb) >= params.rebalance_every:
|
||||
rows = []
|
||||
for ts in close_px.columns:
|
||||
f = feats[ts].set_index("trade_date").reindex([d]).iloc[0]
|
||||
rows.append((ts, bool(f["trend_ok"]) if pd.notna(f["trend_ok"]) else False,
|
||||
float(f["score_raw"]) if pd.notna(f["score_raw"]) else float("nan"),
|
||||
float(f["vol"]) if pd.notna(f["vol"]) else float("nan")))
|
||||
snap = pd.DataFrame(rows, columns=["ts_code", "trend_ok", "score_raw", "vol"]).set_index("ts_code")
|
||||
|
||||
picks = select_portfolio(snap, universe, constraints)
|
||||
vol = snap.loc[picks, "vol"].copy()
|
||||
w = risk_parity_weights(vol, max_w=0.50)
|
||||
|
||||
trailing = ret1[picks].iloc[max(0, i - params.port_vol_window + 1) : i + 1]
|
||||
pvol = portfolio_vol(trailing, w)
|
||||
scale = 1.0
|
||||
if np.isfinite(pvol) and pvol > 0:
|
||||
scale = min(1.0, params.target_ann_vol / pvol)
|
||||
|
||||
w_exec = w * scale
|
||||
weights.loc[d] = 0.0
|
||||
for ts, wi in w_exec.items():
|
||||
weights.loc[d, ts] = float(wi)
|
||||
|
||||
rem = 1.0 - float(w_exec.sum())
|
||||
if rem > 1e-12 and rates_fallback in weights.columns:
|
||||
weights.loc[d, rates_fallback] += rem
|
||||
|
||||
in_pos = {ts for ts in close_px.columns if weights.loc[d, ts] > 1e-12}
|
||||
for ts in in_pos:
|
||||
c = float(close_px.loc[d, ts])
|
||||
highest_close[ts] = max(highest_close.get(ts, c), c)
|
||||
|
||||
last_reb = i
|
||||
|
||||
w_lag = weights.shift(1).fillna(0.0)
|
||||
port_ret = (ret1 * w_lag).sum(axis=1)
|
||||
equity = (1.0 + port_ret).cumprod().to_frame("equity")
|
||||
return equity
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--out", default="data/tune_results_fast.parquet")
|
||||
args = ap.parse_args()
|
||||
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
|
||||
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
|
||||
|
||||
base = TrendParams(rebalance_every=1)
|
||||
|
||||
# grid (keep small)
|
||||
fast_list = [3, 5, 8]
|
||||
slow_list = [15, 20, 30]
|
||||
atr_mult_list = [2.0, 2.5, 3.0]
|
||||
vol_window_list = [10, 20]
|
||||
port_vol_window_list = [40, 60]
|
||||
max_positions_list = [3, 4]
|
||||
|
||||
rows = []
|
||||
|
||||
for sma_fast, sma_slow in itertools.product(fast_list, slow_list):
|
||||
if sma_fast >= sma_slow:
|
||||
continue
|
||||
for atr_mult, vol_window, port_vol_window, max_positions in itertools.product(
|
||||
atr_mult_list, vol_window_list, port_vol_window_list, max_positions_list
|
||||
):
|
||||
params = replace(
|
||||
base,
|
||||
max_positions=max_positions,
|
||||
sma_fast=sma_fast,
|
||||
sma_slow=sma_slow,
|
||||
atr_mult=atr_mult,
|
||||
vol_window=vol_window,
|
||||
port_vol_window=port_vol_window,
|
||||
)
|
||||
cons = replace(constraints, max_positions=max_positions)
|
||||
|
||||
feats = {ts: compute_features(df, params) for ts, df in prices.items()}
|
||||
equity = run_backtest_cached(feats, universe, cons, params, rates_fallback, risk_proxy)
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
continue
|
||||
rows.append({
|
||||
"sma_fast": sma_fast,
|
||||
"sma_slow": sma_slow,
|
||||
"atr_mult": atr_mult,
|
||||
"vol_window": vol_window,
|
||||
"port_vol_window": port_vol_window,
|
||||
"max_positions": max_positions,
|
||||
**st,
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
if df.empty:
|
||||
print("no results")
|
||||
return
|
||||
|
||||
filt = df[df["ann_vol"] <= 0.18].sort_values(["ann_return", "calmar"], ascending=False)
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
filt.to_parquet(out, index=False)
|
||||
|
||||
cols = ["ann_return", "ann_vol", "max_drawdown", "calmar", "sma_fast", "sma_slow", "atr_mult", "vol_window", "port_vol_window", "max_positions"]
|
||||
print("top10")
|
||||
print(filt[cols].head(10).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
95
scripts/auto_tune_etf_trend_small.py
Normal file
95
scripts/auto_tune_etf_trend_small.py
Normal file
@@ -0,0 +1,95 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from dataclasses import replace
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path):
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
return universe, constraints, cons.get("risk_proxy", "510300.SH"), cons.get("rates_fallback", "511010.SH")
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str):
|
||||
out = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series):
|
||||
r = equity.pct_change().dropna()
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
return ann_ret, ann_vol, dd
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
args = ap.parse_args()
|
||||
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
|
||||
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
|
||||
|
||||
base = TrendParams(rebalance_every=1, max_positions=4)
|
||||
|
||||
# A very small candidate set (fast to run)
|
||||
candidates = [
|
||||
(5, 20, 3.0),
|
||||
(5, 20, 2.5),
|
||||
(3, 15, 2.5),
|
||||
(8, 30, 3.0),
|
||||
(10, 40, 3.0),
|
||||
(5, 30, 3.0),
|
||||
]
|
||||
|
||||
rows = []
|
||||
for sma_fast, sma_slow, atr_mult in candidates:
|
||||
params = replace(base, sma_fast=sma_fast, sma_slow=sma_slow, atr_mult=atr_mult)
|
||||
equity, _w = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
constraints,
|
||||
params,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
ann_ret, ann_vol, dd = perf_stats(equity["equity"])
|
||||
rows.append({
|
||||
"ann_return": ann_ret,
|
||||
"ann_vol": ann_vol,
|
||||
"max_drawdown": dd,
|
||||
"sma_fast": sma_fast,
|
||||
"sma_slow": sma_slow,
|
||||
"atr_mult": atr_mult,
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows).sort_values(["ann_return"], ascending=False)
|
||||
print(df.to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
148
scripts/expand_etf_universe.py
Normal file
148
scripts/expand_etf_universe.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
from collections import defaultdict
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.data.tushare_client import load_tushare_config, pro_api
|
||||
|
||||
|
||||
def median_amount(cfg, ts_code: str, start: str, end: str) -> float:
|
||||
api = pro_api(cfg)
|
||||
df = api.fund_daily(ts_code=ts_code, start_date=start, end_date=end, fields="trade_date,amount")
|
||||
if df is None or df.empty or "amount" not in df.columns:
|
||||
return 0.0
|
||||
amt = pd.to_numeric(df["amount"], errors="coerce").dropna()
|
||||
if amt.empty:
|
||||
return 0.0
|
||||
return float(amt.median())
|
||||
|
||||
|
||||
def classify_by_keyword(kw: str) -> str:
|
||||
# very rough tagging for universe constraints / reporting
|
||||
equity_kws = {
|
||||
"半导体",
|
||||
"芯片",
|
||||
"通信",
|
||||
"5G",
|
||||
"通信设备",
|
||||
"军工",
|
||||
"机器人",
|
||||
"工业母机",
|
||||
"智能制造",
|
||||
"消费电子",
|
||||
"AI",
|
||||
"算力",
|
||||
"软件",
|
||||
"创新药",
|
||||
"医药",
|
||||
"新能源",
|
||||
"光伏",
|
||||
"锂电",
|
||||
"电池",
|
||||
"新材料",
|
||||
"稀土",
|
||||
}
|
||||
commodity_kws = {"黄金", "白银", "有色", "稀土", "矿业", "原油", "油", "煤", "化工", "豆粕", "农业"}
|
||||
rates_kws = {"国债", "政金债", "债", "短债", "中债"}
|
||||
|
||||
if kw in rates_kws:
|
||||
return "rates_cn"
|
||||
if kw in commodity_kws:
|
||||
return "commodity_cn"
|
||||
if kw in equity_kws:
|
||||
return "equity_cn_sector"
|
||||
return "equity_cn_sector"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--out", default=None)
|
||||
ap.add_argument("--per_keyword", type=int, default=2)
|
||||
ap.add_argument("--min_median_amount", type=float, default=0.0)
|
||||
ap.add_argument(
|
||||
"--keywords",
|
||||
default=(
|
||||
"半导体,芯片,通信,5G,通信设备,军工,机器人,工业母机,智能制造,消费电子,AI,算力,软件,创新药,医药,新能源,光伏,锂电,电池,"
|
||||
"矿业,有色,稀土,新材料,黄金,白银,原油,煤,化工,豆粕,农业,国债,政金债"
|
||||
),
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
cfg = load_tushare_config()
|
||||
api = pro_api(cfg)
|
||||
|
||||
conf_path = Path(args.config)
|
||||
conf = json.loads(conf_path.read_text(encoding="utf-8"))
|
||||
|
||||
assets = conf.get("assets", [])
|
||||
have = {a["ts_code"] for a in assets}
|
||||
|
||||
kw_list = [k.strip() for k in str(args.keywords).split(",") if k.strip()]
|
||||
|
||||
fb = api.fund_basic(market="E", status="L", fields="ts_code,name")
|
||||
if fb is None or fb.empty:
|
||||
raise RuntimeError("fund_basic returned empty")
|
||||
|
||||
fb = fb.dropna(subset=["ts_code", "name"]).copy()
|
||||
|
||||
end = date.today().strftime("%Y%m%d")
|
||||
start = (date.today() - timedelta(days=180)).strftime("%Y%m%d")
|
||||
|
||||
buckets: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
||||
for _, r in fb.iterrows():
|
||||
ts_code = str(r["ts_code"]).strip()
|
||||
name = str(r["name"]).strip()
|
||||
for kw in kw_list:
|
||||
if kw in name:
|
||||
buckets[kw].append((ts_code, name))
|
||||
break
|
||||
|
||||
chosen: list[tuple[str, str, str, float, str]] = []
|
||||
|
||||
for kw in kw_list:
|
||||
cands = buckets.get(kw, [])
|
||||
if not cands:
|
||||
continue
|
||||
|
||||
scored: list[tuple[float, str, str]] = []
|
||||
for ts_code, name in cands:
|
||||
if ts_code in have:
|
||||
continue
|
||||
try:
|
||||
m = median_amount(cfg, ts_code, start, end)
|
||||
except Exception:
|
||||
m = 0.0
|
||||
if not math.isfinite(m) or m <= 0:
|
||||
continue
|
||||
if m < float(args.min_median_amount):
|
||||
continue
|
||||
scored.append((m, ts_code, name))
|
||||
|
||||
scored.sort(reverse=True)
|
||||
for m, ts_code, name in scored[: int(args.per_keyword)]:
|
||||
cls = classify_by_keyword(kw)
|
||||
chosen.append((kw, ts_code, name, m, cls))
|
||||
|
||||
for kw, ts_code, name, m, cls in chosen:
|
||||
assets.append({"ts_code": ts_code, "asset_class": cls, "name": name})
|
||||
have.add(ts_code)
|
||||
|
||||
conf["assets"] = assets
|
||||
|
||||
out_path = Path(args.out) if args.out else conf_path
|
||||
out_path.write_text(json.dumps(conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
print(f"added {len(chosen)} ETFs")
|
||||
for kw, ts_code, name, m, cls in chosen[:80]:
|
||||
print(f"{kw}\t{ts_code}\t{m:.0f}\t{cls}\t{name}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
128
scripts/filter_universe_by_profile.py
Normal file
128
scripts/filter_universe_by_profile.py
Normal file
@@ -0,0 +1,128 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, ts_code: str) -> pd.DataFrame:
|
||||
fn = raw_dir / f"{ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df.sort_values("trade_date").reset_index(drop=True)
|
||||
return df
|
||||
|
||||
|
||||
def ann_vol(ret1: pd.Series) -> float:
|
||||
r = ret1.dropna()
|
||||
if len(r) < 50:
|
||||
return float("nan")
|
||||
return float(r.std(ddof=1) * np.sqrt(252.0))
|
||||
|
||||
|
||||
def max_drawdown(close: pd.Series) -> float:
|
||||
c = close.astype(float)
|
||||
if c.isna().all() or len(c) < 50:
|
||||
return float("nan")
|
||||
eq = c / float(c.iloc[0])
|
||||
dd = eq / eq.cummax() - 1.0
|
||||
return float(dd.min())
|
||||
|
||||
|
||||
def bias_stats(close: pd.Series, ma_n: int = 20) -> tuple[float, float]:
|
||||
c = close.astype(float)
|
||||
ma = c.rolling(ma_n, min_periods=ma_n).mean()
|
||||
b = (c / ma - 1.0).dropna()
|
||||
if len(b) < 50:
|
||||
return float("nan"), float("nan")
|
||||
return float(b.mean()), float(b.std(ddof=1))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe_industry_only.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--window", type=int, default=504, help="profile window in trading days")
|
||||
ap.add_argument("--out", default="data/universe_profile.parquet")
|
||||
|
||||
# filters (keep high vol, avoid pathological drawdowns)
|
||||
ap.add_argument("--min_ann_vol", type=float, default=0.18)
|
||||
ap.add_argument("--max_dd_floor", type=float, default=-0.65, help="drop assets with max_dd < floor")
|
||||
ap.add_argument("--min_bias_std", type=float, default=0.02)
|
||||
ap.add_argument("--max_bias_std", type=float, default=0.20)
|
||||
|
||||
ap.add_argument("--top", type=int, default=40, help="how many to keep after scoring")
|
||||
ap.add_argument("--out_config", default="configs/etf_universe_industry_profiled.json")
|
||||
args = ap.parse_args()
|
||||
|
||||
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
|
||||
assets = conf["assets"]
|
||||
|
||||
raw = Path(args.rawdir)
|
||||
|
||||
rows = []
|
||||
for a in assets:
|
||||
ts = a["ts_code"]
|
||||
df = load_prices(raw, ts)
|
||||
df = df[(df["trade_date"] >= args.start) & (df["trade_date"] <= args.end)]
|
||||
if len(df) < int(args.window) + 50:
|
||||
continue
|
||||
|
||||
tail = df.tail(int(args.window))
|
||||
close = tail["close"].astype(float)
|
||||
ret1 = close.pct_change()
|
||||
|
||||
v = ann_vol(ret1)
|
||||
dd = max_drawdown(close)
|
||||
bmu, bsd = bias_stats(close, 20)
|
||||
|
||||
rows.append(
|
||||
{
|
||||
"ts_code": ts,
|
||||
"name": a.get("name"),
|
||||
"asset_class": a.get("asset_class"),
|
||||
"ann_vol": v,
|
||||
"max_dd": dd,
|
||||
"bias20_mean": bmu,
|
||||
"bias20_std": bsd,
|
||||
}
|
||||
)
|
||||
|
||||
prof = pd.DataFrame(rows)
|
||||
if prof.empty:
|
||||
raise SystemExit("no assets profiled")
|
||||
|
||||
prof.to_parquet(args.out, index=False)
|
||||
|
||||
# filter
|
||||
f = prof.copy()
|
||||
f = f[np.isfinite(f["ann_vol"]) & np.isfinite(f["max_dd"]) & np.isfinite(f["bias20_std"])].copy()
|
||||
f = f[(f["ann_vol"] >= float(args.min_ann_vol))]
|
||||
f = f[(f["max_dd"] >= float(args.max_dd_floor))]
|
||||
f = f[(f["bias20_std"] >= float(args.min_bias_std)) & (f["bias20_std"] <= float(args.max_bias_std))]
|
||||
|
||||
# score: prefer high vol and stable (less extreme dd). still keep high beta.
|
||||
# normalize with ranks to avoid scale issues
|
||||
f["r_vol"] = f["ann_vol"].rank(pct=True)
|
||||
f["r_dd"] = f["max_dd"].rank(pct=True) # less negative => higher rank
|
||||
f["score"] = 0.70 * f["r_vol"] + 0.30 * f["r_dd"]
|
||||
|
||||
f = f.sort_values("score", ascending=False)
|
||||
keep = set(f.head(int(args.top))["ts_code"].tolist())
|
||||
|
||||
new_conf = conf.copy()
|
||||
new_conf["assets"] = [a for a in assets if a["ts_code"] in keep]
|
||||
Path(args.out_config).write_text(json.dumps(new_conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
print("profiled", len(prof), "filtered_keep", len(new_conf["assets"]))
|
||||
print(f.head(15)[["ts_code", "ann_vol", "max_dd", "bias20_std", "score"]].to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
159
scripts/grid_search_opt.py
Normal file
159
scripts/grid_search_opt.py
Normal file
@@ -0,0 +1,159 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import itertools
|
||||
import json
|
||||
import random
|
||||
from dataclasses import asdict, replace
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy", "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, risk_proxy, rates_fallback
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--out", default="data/grid_search_results.parquet")
|
||||
ap.add_argument("--seed", type=int, default=1)
|
||||
ap.add_argument("--max_combos", type=int, default=400, help="Randomly sample at most this many combos")
|
||||
args = ap.parse_args()
|
||||
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
|
||||
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
|
||||
|
||||
base = TrendParams(target_ann_vol=0.25)
|
||||
|
||||
# Keep grid small. We will sample max_combos from the full cartesian product.
|
||||
grid = {
|
||||
"sma_fast": [3, 5, 8],
|
||||
"sma_slow": [15, 20, 30, 40],
|
||||
"lazy_days": [2, 5],
|
||||
"rebalance_band": [0.03, 0.06],
|
||||
"atr_mult": [2.5, 3.2, 4.0],
|
||||
"profit_tighten_atr": [3.0, 4.0],
|
||||
"atr_mult_profit": [1.5, 2.0],
|
||||
"stop_loss_atr": [2.5, 3.2],
|
||||
"bias_exit": [0.12, 0.18],
|
||||
"vol_ratio_exit": [2.0, 3.0],
|
||||
"max_weight_per_asset": [0.7, 0.9],
|
||||
"concentration_power": [1.6, 2.2],
|
||||
}
|
||||
|
||||
keys = list(grid.keys())
|
||||
combos = list(itertools.product(*(grid[k] for k in keys)))
|
||||
|
||||
random.seed(int(args.seed))
|
||||
if int(args.max_combos) > 0 and len(combos) > int(args.max_combos):
|
||||
combos = random.sample(combos, int(args.max_combos))
|
||||
|
||||
rows = []
|
||||
|
||||
for vals in combos:
|
||||
kw = dict(zip(keys, vals))
|
||||
if int(kw["sma_fast"]) >= int(kw["sma_slow"]):
|
||||
continue
|
||||
|
||||
params = replace(base, **kw, rebalance_every=1, max_positions=constraints.max_positions)
|
||||
|
||||
try:
|
||||
equity, _w, _tr = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
constraints,
|
||||
params,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
continue
|
||||
|
||||
row = {**st, **asdict(params)}
|
||||
rows.append(row)
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
if df.empty:
|
||||
print("no results")
|
||||
return
|
||||
|
||||
df = df[df["ann_vol"] <= 0.25].copy()
|
||||
df = df.sort_values(["ann_return", "sharpe"], ascending=False)
|
||||
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(out, index=False)
|
||||
|
||||
cols = [
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"sma_fast",
|
||||
"sma_slow",
|
||||
"lazy_days",
|
||||
"rebalance_band",
|
||||
"atr_mult",
|
||||
"profit_tighten_atr",
|
||||
"atr_mult_profit",
|
||||
"stop_loss_atr",
|
||||
"bias_exit",
|
||||
"vol_ratio_exit",
|
||||
"max_weight_per_asset",
|
||||
"concentration_power",
|
||||
]
|
||||
|
||||
print("top10")
|
||||
print(df[cols].head(10).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
130
scripts/grid_search_stage_a.py
Normal file
130
scripts/grid_search_stage_a.py
Normal file
@@ -0,0 +1,130 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import itertools
|
||||
import json
|
||||
from dataclasses import asdict, replace
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy", "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, risk_proxy, rates_fallback
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252 ** 0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--out", default="data/grid_stage_a.parquet")
|
||||
args = ap.parse_args()
|
||||
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config))
|
||||
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
|
||||
|
||||
base = TrendParams(target_ann_vol=0.25)
|
||||
|
||||
sma_fast_list = [3, 5, 8]
|
||||
sma_slow_list = [15, 20, 30, 40]
|
||||
lazy_days_list = [1, 2, 5, 10]
|
||||
band_list = [0.03, 0.05, 0.08]
|
||||
atr_mult_list = [2.5, 3.0, 3.2, 4.0]
|
||||
|
||||
rows = []
|
||||
|
||||
for sma_fast, sma_slow, lazy_days, band, atr_mult in itertools.product(
|
||||
sma_fast_list, sma_slow_list, lazy_days_list, band_list, atr_mult_list
|
||||
):
|
||||
if sma_fast >= sma_slow:
|
||||
continue
|
||||
|
||||
params = replace(
|
||||
base,
|
||||
rebalance_every=1,
|
||||
max_positions=constraints.max_positions,
|
||||
sma_fast=sma_fast,
|
||||
sma_slow=sma_slow,
|
||||
lazy_days=lazy_days,
|
||||
rebalance_band=band,
|
||||
atr_mult=float(atr_mult),
|
||||
)
|
||||
|
||||
try:
|
||||
equity, _w, _tr = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
constraints,
|
||||
params,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
continue
|
||||
|
||||
row = {**st, **asdict(params)}
|
||||
rows.append(row)
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
if df.empty:
|
||||
print("no results")
|
||||
return
|
||||
|
||||
df = df[df["ann_vol"] <= 0.25].copy()
|
||||
df = df.sort_values(["ann_return", "sharpe"], ascending=False)
|
||||
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(out, index=False)
|
||||
|
||||
cols = ["ann_return", "ann_vol", "max_drawdown", "sharpe", "sma_fast", "sma_slow", "lazy_days", "rebalance_band", "atr_mult"]
|
||||
print("top10")
|
||||
print(df[cols].head(10).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
472
scripts/iterate_best_local.py
Normal file
472
scripts/iterate_best_local.py
Normal file
@@ -0,0 +1,472 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import random
|
||||
import sqlite3
|
||||
from dataclasses import asdict, fields, replace
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 3)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, str(risk_proxy), str(rates_fallback)
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252**0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
|
||||
|
||||
|
||||
def trades_per_year(trades: pd.DataFrame | None, start: str, end: str) -> float:
|
||||
if trades is None or getattr(trades, "empty", True):
|
||||
return 0.0
|
||||
years = max(1, (int(end[:4]) - int(start[:4]) + 1))
|
||||
return float(len(trades) / years)
|
||||
|
||||
|
||||
def ensure_db(db_path: Path, param_cols: list[str]) -> None:
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
con.execute("PRAGMA journal_mode=WAL")
|
||||
con.execute("PRAGMA synchronous=NORMAL")
|
||||
con.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS trials (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_id TEXT NOT NULL,
|
||||
ts_utc TEXT NOT NULL,
|
||||
code_version TEXT,
|
||||
config_path TEXT,
|
||||
start TEXT,
|
||||
end TEXT,
|
||||
seed INTEGER,
|
||||
trial INTEGER,
|
||||
jobs INTEGER,
|
||||
ann_return REAL,
|
||||
ann_vol REAL,
|
||||
max_drawdown REAL,
|
||||
sharpe REAL,
|
||||
trades_per_year REAL
|
||||
)
|
||||
"""
|
||||
)
|
||||
for c in param_cols:
|
||||
try:
|
||||
con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
|
||||
|
||||
def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None:
|
||||
if not rows:
|
||||
return
|
||||
cols = [
|
||||
"run_id",
|
||||
"ts_utc",
|
||||
"code_version",
|
||||
"config_path",
|
||||
"start",
|
||||
"end",
|
||||
"seed",
|
||||
"trial",
|
||||
"jobs",
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"trades_per_year",
|
||||
*param_cols,
|
||||
]
|
||||
q = ",".join(["?"] * len(cols))
|
||||
join_cols = ",".join(cols)
|
||||
sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})"
|
||||
vals = []
|
||||
for r in rows:
|
||||
vals.append([r.get(c) for c in cols])
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
con.executemany(sql, vals)
|
||||
con.commit()
|
||||
|
||||
|
||||
def load_state(path: Path) -> dict:
|
||||
if path.exists():
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
return {"best": None, "last_reported_ann_return": None, "history": []}
|
||||
|
||||
|
||||
def save_state(path: Path, state: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def infer_code_version(repo_dir: Path) -> str:
|
||||
head = repo_dir / ".git" / "HEAD"
|
||||
if head.exists():
|
||||
try:
|
||||
txt = head.read_text(encoding="utf-8").strip()
|
||||
if txt.startswith("ref:"):
|
||||
ref = txt.split(" ", 1)[1]
|
||||
ref_path = repo_dir / ".git" / ref
|
||||
if ref_path.exists():
|
||||
return ref_path.read_text(encoding="utf-8").strip()
|
||||
return txt
|
||||
except Exception:
|
||||
return "unknown"
|
||||
return "nogit"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--trials", type=int, default=20)
|
||||
ap.add_argument("--seed", type=int, default=1)
|
||||
ap.add_argument("--jobs", type=int, default=1)
|
||||
ap.add_argument("--state", default="data/opt_state.json")
|
||||
ap.add_argument("--db", default="data/experiments.sqlite")
|
||||
ap.add_argument("--max_trades_per_year", type=float, default=80.0)
|
||||
ap.add_argument("--progress_every", type=int, default=5)
|
||||
ap.add_argument(
|
||||
"--tweak",
|
||||
action="append",
|
||||
default=[],
|
||||
help=(
|
||||
"Enable a tweak group. Repeatable. Options: macro, churn, stops, score, switches, switches2, signal1, orth_ma, orth_weights, orth_mech, asym_fast, positions, exits. "
|
||||
"(Each group adjusts <=4 params around current best.)"
|
||||
),
|
||||
)
|
||||
args = ap.parse_args()
|
||||
|
||||
rng = random.Random(int(args.seed))
|
||||
np.random.seed(int(args.seed))
|
||||
|
||||
config_path = Path(args.config)
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
|
||||
prices = load_prices(Path(args.rawdir), universe, str(args.start), str(args.end))
|
||||
|
||||
state_path = Path(args.state)
|
||||
state = load_state(state_path)
|
||||
best_row = state.get("best")
|
||||
if not best_row:
|
||||
raise SystemExit("opt_state.json missing best")
|
||||
|
||||
tp_fields = {f.name for f in fields(TrendParams)}
|
||||
|
||||
defaults = TrendParams(max_positions=constraints.max_positions)
|
||||
best_params = {k: best_row[k] for k in best_row.keys() if k in tp_fields}
|
||||
|
||||
typed: dict[str, Any] = {}
|
||||
for k, v in best_params.items():
|
||||
t = type(getattr(defaults, k))
|
||||
if t is int:
|
||||
typed[k] = int(v)
|
||||
elif t is float:
|
||||
typed[k] = float(v)
|
||||
else:
|
||||
typed[k] = v
|
||||
|
||||
base = replace(defaults, **typed)
|
||||
|
||||
tweaks = set(args.tweak or [])
|
||||
|
||||
def sample_params() -> TrendParams:
|
||||
p = base
|
||||
|
||||
if "macro" in tweaks:
|
||||
p = replace(
|
||||
p,
|
||||
macro_min_breadth=float(rng.choice([0.10, 0.12, 0.15, 0.18, 0.20])),
|
||||
macro_down_frac=float(rng.choice([0.75, 0.78, 0.80, 0.82, 0.85])),
|
||||
)
|
||||
|
||||
if "churn" in tweaks:
|
||||
p = replace(
|
||||
p,
|
||||
lazy_days=int(rng.choice([6, 8, 10])),
|
||||
min_hold_days=int(rng.choice([2, 3, 4, 5])),
|
||||
replace_score_gap=float(rng.choice([0.5, 0.8, 1.2, 1.6])),
|
||||
)
|
||||
|
||||
if "switches" in tweaks:
|
||||
# switch/constraint knobs (exactly 4 factors)
|
||||
p = replace(
|
||||
p,
|
||||
desired_positions_min=int(rng.choice([1, 2, 3])),
|
||||
replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
|
||||
lazy_days=int(rng.choice([4, 6, 8, 10, 12])),
|
||||
min_hold_days=int(rng.choice([1, 2, 3, 4, 5])),
|
||||
)
|
||||
|
||||
if "switches2" in tweaks:
|
||||
# route D churn control without forcing higher min holdings (desired_positions_min fixed)
|
||||
# exactly 4 factors: replace_score_gap, lazy_days, min_hold_days, cooldown_days
|
||||
p = replace(
|
||||
p,
|
||||
desired_positions_min=int(1),
|
||||
replace_score_gap=float(rng.choice([0.5, 0.8, 1.0, 1.2, 1.6])),
|
||||
lazy_days=int(rng.choice([8, 10, 12, 14, 16])),
|
||||
min_hold_days=int(rng.choice([3, 5, 7, 10])),
|
||||
cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
|
||||
)
|
||||
|
||||
if "signal1" in tweaks:
|
||||
# route D: improve signal quality (exactly 4 factors)
|
||||
p = replace(
|
||||
p,
|
||||
min_score=float(rng.choice([0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])),
|
||||
trend_strength_weight=float(rng.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])),
|
||||
score_vol_denom_floor=float(rng.choice([0.01, 0.02, 0.03, 0.04, 0.05])),
|
||||
macro_min_breadth=float(rng.choice([0.10, 0.15, 0.20, 0.25, 0.30])),
|
||||
)
|
||||
|
||||
|
||||
if "orth_ma" in tweaks:
|
||||
# route R: orthogonal to score/stops/exits; explore timing knobs (exactly 4 factors)
|
||||
p = replace(
|
||||
p,
|
||||
sma_fast=int(rng.choice([3, 5, 7, 9, 12])),
|
||||
sma_slow=int(rng.choice([20, 30, 40, 60, 90])),
|
||||
rebalance_every=int(rng.choice([1, 2, 3, 5])),
|
||||
max_replaces_per_day=int(rng.choice([0, 1, 2])),
|
||||
)
|
||||
if p.sma_fast >= p.sma_slow:
|
||||
p = replace(p, sma_fast=max(3, int(p.sma_slow // 6)))
|
||||
|
||||
|
||||
if "orth_weights" in tweaks:
|
||||
# route R: orthogonal portfolio weight shape (exactly 4 factors)
|
||||
max_positions = int(rng.choice([2, 3, 4, 5]))
|
||||
desired_min = int(rng.choice([1, 2, 3]))
|
||||
desired_max = int(rng.choice([2, 3, 4, 5]))
|
||||
desired_min = min(desired_min, desired_max)
|
||||
desired_max = min(desired_max, max_positions)
|
||||
desired_min = min(desired_min, desired_max)
|
||||
p = replace(
|
||||
p,
|
||||
max_positions=max_positions,
|
||||
desired_positions_min=desired_min,
|
||||
desired_positions_max=desired_max,
|
||||
max_weight_per_asset=float(rng.choice([0.35, 0.45, 0.60, 0.75, 0.90, 1.00])),
|
||||
)
|
||||
# concentration_power exists in TrendParams; adjust it separately (still counts as one factor)
|
||||
p = replace(p, concentration_power=float(rng.choice([1.2, 1.6, 2.0, 2.2, 2.6, 3.0])))
|
||||
|
||||
|
||||
if "orth_mech" in tweaks:
|
||||
# route R: mechanism/turnover knobs (exactly 4 factors)
|
||||
p = replace(
|
||||
p,
|
||||
rebalance_every=int(rng.choice([1, 2, 3, 5])),
|
||||
replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
|
||||
max_replaces_per_day=int(rng.choice([0, 1, 2, 3])),
|
||||
cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
|
||||
)
|
||||
|
||||
|
||||
if "asym_fast" in tweaks:
|
||||
# asymmetric bull/bear risk controls (fast-run) (exactly 4 factors)
|
||||
p = replace(
|
||||
p,
|
||||
regime_confirm_days=int(rng.choice([2, 3, 4, 5])),
|
||||
bull_atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
|
||||
bear_atr_mult=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
|
||||
bear_stop_loss_atr=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
|
||||
)
|
||||
|
||||
|
||||
if "positions" in tweaks:
|
||||
# concentration/positioning knobs (exactly 4 factors)
|
||||
max_positions = int(rng.choice([2, 3, 4]))
|
||||
desired_min = int(rng.choice([1, 2, 3]))
|
||||
desired_max = int(rng.choice([2, 3, 4]))
|
||||
# keep consistent
|
||||
desired_min = min(desired_min, desired_max)
|
||||
desired_max = min(desired_max, max_positions)
|
||||
desired_min = min(desired_min, desired_max)
|
||||
p = replace(
|
||||
p,
|
||||
max_positions=max_positions,
|
||||
desired_positions_min=desired_min,
|
||||
desired_positions_max=desired_max,
|
||||
max_weight_per_asset=float(rng.choice([0.45, 0.60, 0.75, 0.90, 1.00])),
|
||||
)
|
||||
|
||||
if "stops" in tweaks:
|
||||
# risk-control fine search (route D: prefer higher sharpe / lower drawdown)
|
||||
p = replace(
|
||||
p,
|
||||
atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
|
||||
stop_loss_atr=float(rng.choice([2.4, 2.6, 2.8, 3.0, 3.2])),
|
||||
profit_tighten_atr=float(rng.choice([4.0, 6.0, 8.0])),
|
||||
atr_mult_profit=float(rng.choice([1.3, 1.5, 1.8, 2.0])),
|
||||
)
|
||||
|
||||
if "exits" in tweaks:
|
||||
# anomaly exits fine search (route D) - exactly 4 factors
|
||||
p = replace(
|
||||
p,
|
||||
bias_window=int(rng.choice([10, 15, 20, 30])),
|
||||
bias_exit=float(rng.choice([0.12, 0.16, 0.20, 0.25, 0.30])),
|
||||
vol_short=int(rng.choice([3, 5, 8, 10])),
|
||||
vol_ratio_exit=float(rng.choice([2.0, 2.5, 3.0, 3.5, 4.0])),
|
||||
)
|
||||
|
||||
if "score" in tweaks:
|
||||
# aggressive weight search for higher ann_return
|
||||
p = replace(
|
||||
p,
|
||||
min_score=float(rng.choice([-0.10, 0.00, 0.05, 0.10, 0.20, 0.30, 0.40])),
|
||||
trend_strength_weight=float(rng.choice([0.00, 0.20, 0.40, 0.60, 0.80, 1.00])),
|
||||
w_r20=float(rng.choice([0.20, 0.35, 0.50, 0.65, 0.80])),
|
||||
w_r60=float(rng.choice([0.00, 0.10, 0.20, 0.35, 0.50])),
|
||||
)
|
||||
remain = 1.0 - (p.w_r20 + p.w_r60)
|
||||
w_r5 = float(max(0.0, min(0.6, remain * 0.6)))
|
||||
w_r120 = float(max(0.0, remain - w_r5))
|
||||
p = replace(p, w_r5=w_r5, w_r120=w_r120)
|
||||
|
||||
return p
|
||||
|
||||
param_cols = sorted(asdict(base).keys())
|
||||
db_path = Path(args.db)
|
||||
ensure_db(db_path, param_cols=param_cols)
|
||||
|
||||
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_bestlocal_seed{int(args.seed)}" + ("_" + "-".join(sorted(tweaks)) if tweaks else "")
|
||||
code_version = infer_code_version(Path("."))
|
||||
|
||||
best_ann = float(best_row.get("ann_return") or float("-inf"))
|
||||
|
||||
rows_for_db: list[dict[str, Any]] = []
|
||||
valid = 0
|
||||
for t in range(int(args.trials)):
|
||||
p = sample_params()
|
||||
|
||||
equity, _w, tr = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
constraints,
|
||||
p,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
continue
|
||||
|
||||
tpy = trades_per_year(tr, str(args.start), str(args.end))
|
||||
if tpy > float(args.max_trades_per_year):
|
||||
continue
|
||||
|
||||
valid += 1
|
||||
row = {**st, "trades_per_year": float(tpy), **asdict(p)}
|
||||
row["trial"] = int(t)
|
||||
row["seed"] = int(args.seed)
|
||||
|
||||
if float(row["ann_return"]) > best_ann:
|
||||
best_ann = float(row["ann_return"])
|
||||
state["best"] = row
|
||||
save_state(state_path, state)
|
||||
|
||||
db_row = {
|
||||
"run_id": run_id,
|
||||
"ts_utc": datetime.now(timezone.utc).isoformat(),
|
||||
"code_version": code_version,
|
||||
"config_path": str(config_path),
|
||||
"start": str(args.start),
|
||||
"end": str(args.end),
|
||||
"seed": int(args.seed),
|
||||
"trial": int(t),
|
||||
"jobs": int(args.jobs),
|
||||
"ann_return": float(row["ann_return"]),
|
||||
"ann_vol": float(row["ann_vol"]),
|
||||
"max_drawdown": float(row["max_drawdown"]),
|
||||
"sharpe": float(row["sharpe"]),
|
||||
"trades_per_year": float(row["trades_per_year"]),
|
||||
}
|
||||
for c in param_cols:
|
||||
db_row[c] = row.get(c)
|
||||
rows_for_db.append(db_row)
|
||||
|
||||
if int(args.progress_every) > 0 and valid % int(args.progress_every) == 0:
|
||||
print(f"progress valid={valid} best_ann={best_ann:.4f}", flush=True)
|
||||
|
||||
if rows_for_db:
|
||||
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
|
||||
|
||||
state.setdefault("history", []).append(
|
||||
{
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"run_id": run_id,
|
||||
"code_version": code_version,
|
||||
"config": str(args.config),
|
||||
"start": str(args.start),
|
||||
"end": str(args.end),
|
||||
"trials": int(args.trials),
|
||||
"jobs": int(args.jobs),
|
||||
"best_ann_return": float(best_ann) if np.isfinite(best_ann) else None,
|
||||
"db": str(args.db),
|
||||
"base_from": "opt_state.best",
|
||||
"tweaks": sorted(tweaks),
|
||||
}
|
||||
)
|
||||
save_state(state_path, state)
|
||||
|
||||
df = pd.DataFrame(rows_for_db).sort_values(["ann_return"], ascending=False)
|
||||
view_cols = [
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"trades_per_year",
|
||||
"atr_mult",
|
||||
"stop_loss_atr",
|
||||
"profit_tighten_atr",
|
||||
"atr_mult_profit",
|
||||
]
|
||||
view_cols = [c for c in view_cols if c in df.columns]
|
||||
print("run_id", run_id)
|
||||
print(df[view_cols].head(8).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
499
scripts/iterate_optimize.py
Normal file
499
scripts/iterate_optimize.py
Normal file
@@ -0,0 +1,499 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import sqlite3
|
||||
from dataclasses import asdict, replace
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
# Globals for multiprocessing (fork mode shares memory COW)
|
||||
_G_PRICES: dict[str, pd.DataFrame] | None = None
|
||||
_G_UNIVERSE: list[UniverseAsset] | None = None
|
||||
_G_CONSTRAINTS: Constraints | None = None
|
||||
_G_RISK_PROXY: str | None = None
|
||||
_G_RATES_FALLBACK: str | None = None
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 3)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, str(risk_proxy), str(rates_fallback)
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252**0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
|
||||
|
||||
|
||||
def trades_per_year(trades: pd.DataFrame, start: str, end: str) -> float:
|
||||
if trades is None or trades.empty:
|
||||
return 0.0
|
||||
years = max(1, (int(end[:4]) - int(start[:4]) + 1))
|
||||
return float(len(trades) / years)
|
||||
|
||||
|
||||
def load_state(path: Path) -> dict:
|
||||
if path.exists():
|
||||
return json.loads(path.read_text(encoding="utf-8"))
|
||||
return {"best": None, "last_reported_ann_return": None, "history": []}
|
||||
|
||||
|
||||
def save_state(path: Path, state: dict) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
|
||||
|
||||
|
||||
def infer_code_version(repo_dir: Path) -> str:
|
||||
# Prefer git commit hash if available.
|
||||
head = repo_dir / ".git" / "HEAD"
|
||||
if head.exists():
|
||||
try:
|
||||
txt = head.read_text(encoding="utf-8").strip()
|
||||
if txt.startswith("ref:"):
|
||||
ref = txt.split(" ", 1)[1]
|
||||
ref_path = repo_dir / ".git" / ref
|
||||
if ref_path.exists():
|
||||
return ref_path.read_text(encoding="utf-8").strip()
|
||||
return txt
|
||||
except Exception:
|
||||
return "unknown"
|
||||
return "nogit"
|
||||
|
||||
|
||||
def ensure_db(db_path: Path, param_cols: list[str]) -> None:
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
con.execute("PRAGMA journal_mode=WAL")
|
||||
con.execute("PRAGMA synchronous=NORMAL")
|
||||
con.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS trials (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_id TEXT NOT NULL,
|
||||
ts_utc TEXT NOT NULL,
|
||||
code_version TEXT,
|
||||
config_path TEXT,
|
||||
start TEXT,
|
||||
end TEXT,
|
||||
seed INTEGER,
|
||||
trial INTEGER,
|
||||
jobs INTEGER,
|
||||
ann_return REAL,
|
||||
ann_vol REAL,
|
||||
max_drawdown REAL,
|
||||
sharpe REAL,
|
||||
trades_per_year REAL
|
||||
)
|
||||
"""
|
||||
)
|
||||
# Add param columns if missing (structured fields)
|
||||
for c in param_cols:
|
||||
try:
|
||||
con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL")
|
||||
except sqlite3.OperationalError:
|
||||
pass
|
||||
|
||||
|
||||
def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None:
|
||||
if not rows:
|
||||
return
|
||||
cols = [
|
||||
"run_id",
|
||||
"ts_utc",
|
||||
"code_version",
|
||||
"config_path",
|
||||
"start",
|
||||
"end",
|
||||
"seed",
|
||||
"trial",
|
||||
"jobs",
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"trades_per_year",
|
||||
*param_cols,
|
||||
]
|
||||
q = ",".join(["?"] * len(cols))
|
||||
join_cols = ",".join(cols)
|
||||
sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})"
|
||||
vals = []
|
||||
for r in rows:
|
||||
vals.append([r.get(c) for c in cols])
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
con.executemany(sql, vals)
|
||||
con.commit()
|
||||
|
||||
|
||||
|
||||
|
||||
def reservoir_sample_product(rng, iterables, k: int):
|
||||
"""Sample up to k combos from cartesian product."""
|
||||
import itertools
|
||||
|
||||
sample = []
|
||||
n = 0
|
||||
for combo in itertools.product(*iterables):
|
||||
n += 1
|
||||
if len(sample) < k:
|
||||
sample.append(combo)
|
||||
else:
|
||||
j = rng.randrange(n)
|
||||
if j < k:
|
||||
sample[j] = combo
|
||||
return sample
|
||||
|
||||
|
||||
def _init_globals(prices: dict[str, pd.DataFrame], universe: list[UniverseAsset], constraints: Constraints, risk_proxy: str, rates_fallback: str) -> None:
|
||||
global _G_PRICES, _G_UNIVERSE, _G_CONSTRAINTS, _G_RISK_PROXY, _G_RATES_FALLBACK
|
||||
_G_PRICES = prices
|
||||
_G_UNIVERSE = universe
|
||||
_G_CONSTRAINTS = constraints
|
||||
_G_RISK_PROXY = risk_proxy
|
||||
_G_RATES_FALLBACK = rates_fallback
|
||||
|
||||
|
||||
def _eval_one(task: dict[str, Any]) -> dict[str, Any] | None:
|
||||
assert _G_PRICES is not None
|
||||
assert _G_UNIVERSE is not None
|
||||
assert _G_CONSTRAINTS is not None
|
||||
assert _G_RISK_PROXY is not None
|
||||
assert _G_RATES_FALLBACK is not None
|
||||
|
||||
params = TrendParams()
|
||||
params = replace(params, **task["params"])
|
||||
|
||||
try:
|
||||
equity, _w, tr = run_backtest(
|
||||
_G_PRICES,
|
||||
_G_UNIVERSE,
|
||||
_G_CONSTRAINTS,
|
||||
params,
|
||||
rates_fallback=_G_RATES_FALLBACK,
|
||||
risk_proxy=_G_RISK_PROXY,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
if not st:
|
||||
return None
|
||||
|
||||
tpy = trades_per_year(tr, task["start"], task["end"])
|
||||
if tpy > float(task["max_trades_per_year"]):
|
||||
return None
|
||||
|
||||
row = {**st, "trades_per_year": float(tpy), **asdict(params)}
|
||||
row["trial"] = int(task["trial"])
|
||||
row["seed"] = int(task["seed"])
|
||||
return row
|
||||
|
||||
|
||||
MAX_GRID_COMBOS = 128
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default="20200101")
|
||||
ap.add_argument("--end", default="20251231")
|
||||
ap.add_argument("--trials", type=int, default=240)
|
||||
ap.add_argument("--mode", choices=["random", "grid"], default="random")
|
||||
ap.add_argument("--max_grid", type=int, default=MAX_GRID_COMBOS)
|
||||
ap.add_argument("--seed", type=int, default=1)
|
||||
ap.add_argument("--jobs", type=int, default=1, help="Parallel workers (processes), up to 8")
|
||||
ap.add_argument("--state", default="data/opt_state.json")
|
||||
ap.add_argument("--db", default="data/experiments.sqlite")
|
||||
ap.add_argument("--baseline", type=float, default=None)
|
||||
ap.add_argument("--report_step", type=float, default=0.05)
|
||||
ap.add_argument("--max_trades_per_year", type=float, default=80.0)
|
||||
ap.add_argument("--progress_every", type=int, default=25)
|
||||
args = ap.parse_args()
|
||||
jobs = max(1, min(8, int(args.jobs)))
|
||||
|
||||
random.seed(args.seed)
|
||||
np.random.seed(args.seed)
|
||||
|
||||
config_path = Path(args.config)
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
|
||||
prices = load_prices(Path(args.rawdir), universe, args.start, args.end)
|
||||
_init_globals(prices, universe, constraints, risk_proxy, rates_fallback)
|
||||
|
||||
state_path = Path(args.state)
|
||||
state = load_state(state_path)
|
||||
|
||||
best = state.get("best")
|
||||
best_ann = float(best["ann_return"]) if best else float("-inf")
|
||||
|
||||
baseline = args.baseline
|
||||
if baseline is None:
|
||||
baseline = best_ann if np.isfinite(best_ann) else 0.0
|
||||
|
||||
last_rep = state.get("last_reported_ann_return")
|
||||
if last_rep is None:
|
||||
last_rep = baseline
|
||||
|
||||
params0 = TrendParams(max_positions=constraints.max_positions)
|
||||
params0_dict = asdict(params0)
|
||||
|
||||
# Parameter columns to persist as structured fields in SQLite
|
||||
param_cols = sorted(params0_dict.keys())
|
||||
|
||||
db_path = Path(args.db)
|
||||
ensure_db(db_path, param_cols=param_cols)
|
||||
|
||||
run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_seed{int(args.seed)}"
|
||||
code_version = infer_code_version(Path("."))
|
||||
|
||||
tasks: list[dict[str, Any]] = []
|
||||
|
||||
rng = random.Random(int(args.seed))
|
||||
|
||||
if str(args.mode) == "grid":
|
||||
grids = {
|
||||
"sma_fast": [3, 5],
|
||||
"sma_slow": [15, 20, 30],
|
||||
"lazy_days": [4, 5, 6, 8],
|
||||
"min_hold_days": [2, 3, 5],
|
||||
"replace_score_gap": [0.5, 0.8, 1.2, 1.6],
|
||||
"min_score": [0.0, 0.2, 0.4, 0.6],
|
||||
"desired_positions_min": [1, 2],
|
||||
"macro_min_breadth": [0.10, 0.15, 0.20, 0.30],
|
||||
"macro_down_frac": [0.75, 0.80, 0.85],
|
||||
"atr_mult": [2.5, 3.2, 4.0],
|
||||
"stop_loss_atr": [2.0, 2.5, 3.2],
|
||||
"profit_tighten_atr": [4.0, 6.0, 8.0],
|
||||
"atr_mult_profit": [1.5, 2.0, 2.5],
|
||||
"bias_exit": [0.12, 0.18, 0.25],
|
||||
"vol_ratio_exit": [3.0, 4.0],
|
||||
}
|
||||
|
||||
keys = list(grids.keys())
|
||||
iters = [list(grids[k]) for k in keys]
|
||||
total = 1
|
||||
for xs in iters:
|
||||
total *= max(1, len(xs))
|
||||
max_grid = max(1, int(args.max_grid))
|
||||
if total > max_grid:
|
||||
print(f"grid combos {total} > {max_grid}; sampling combos", flush=True)
|
||||
combos = reservoir_sample_product(rng, iters, max_grid)
|
||||
else:
|
||||
import itertools
|
||||
combos = list(itertools.product(*iters))
|
||||
|
||||
for t, combo in enumerate(combos):
|
||||
vals = dict(zip(keys, combo))
|
||||
sma_fast = int(vals["sma_fast"])
|
||||
sma_slow = int(vals["sma_slow"])
|
||||
if sma_fast >= sma_slow:
|
||||
continue
|
||||
p = replace(
|
||||
params0,
|
||||
sma_fast=sma_fast,
|
||||
sma_slow=sma_slow,
|
||||
lazy_days=int(vals["lazy_days"]),
|
||||
min_hold_days=int(vals["min_hold_days"]),
|
||||
replace_score_gap=float(vals["replace_score_gap"]),
|
||||
min_score=float(vals["min_score"]),
|
||||
desired_positions_min=int(vals["desired_positions_min"]),
|
||||
desired_positions_max=int(3),
|
||||
macro_min_breadth=float(vals["macro_min_breadth"]),
|
||||
macro_down_frac=float(vals["macro_down_frac"]),
|
||||
atr_mult=float(vals["atr_mult"]),
|
||||
stop_loss_atr=float(vals["stop_loss_atr"]),
|
||||
profit_tighten_atr=float(vals["profit_tighten_atr"]),
|
||||
atr_mult_profit=float(vals["atr_mult_profit"]),
|
||||
bias_exit=float(vals["bias_exit"]),
|
||||
vol_ratio_exit=float(vals["vol_ratio_exit"]),
|
||||
rebalance_every=1,
|
||||
)
|
||||
|
||||
tasks.append({
|
||||
"trial": int(t),
|
||||
"seed": int(args.seed),
|
||||
"start": str(args.start),
|
||||
"end": str(args.end),
|
||||
"max_trades_per_year": float(args.max_trades_per_year),
|
||||
"params": {k: asdict(p)[k] for k in param_cols},
|
||||
})
|
||||
else:
|
||||
for t in range(int(args.trials)):
|
||||
sma_fast = rng.choice([3, 5])
|
||||
sma_slow = rng.choice([15, 20, 30])
|
||||
if sma_fast >= sma_slow:
|
||||
continue
|
||||
lazy_days = rng.choice([4, 5, 6, 8])
|
||||
min_hold = rng.choice([2, 3, 5])
|
||||
replace_gap = rng.choice([0.5, 0.8, 1.2, 1.6])
|
||||
min_score = rng.choice([0.0, 0.2, 0.4, 0.6])
|
||||
dmin = rng.choice([1, 2])
|
||||
dmax = 3
|
||||
macro_min_breadth = rng.choice([0.10, 0.15, 0.20, 0.30])
|
||||
macro_down_frac = rng.choice([0.75, 0.80, 0.85])
|
||||
atr_mult = rng.choice([2.5, 3.2, 4.0])
|
||||
stop_loss_atr = rng.choice([2.0, 2.5, 3.2])
|
||||
profit_tighten_atr = rng.choice([4.0, 6.0, 8.0])
|
||||
atr_mult_profit = rng.choice([1.5, 2.0, 2.5])
|
||||
bias_exit = rng.choice([0.12, 0.18, 0.25])
|
||||
vol_ratio_exit = rng.choice([3.0, 4.0])
|
||||
p = replace(params0, sma_fast=int(sma_fast), sma_slow=int(sma_slow), lazy_days=int(lazy_days), min_hold_days=int(min_hold), replace_score_gap=float(replace_gap), min_score=float(min_score), desired_positions_min=int(dmin), desired_positions_max=int(dmax), macro_min_breadth=float(macro_min_breadth), macro_down_frac=float(macro_down_frac), atr_mult=float(atr_mult), stop_loss_atr=float(stop_loss_atr), profit_tighten_atr=float(profit_tighten_atr), atr_mult_profit=float(atr_mult_profit), bias_exit=float(bias_exit), vol_ratio_exit=float(vol_ratio_exit), rebalance_every=1)
|
||||
tasks.append({"trial": int(t), "seed": int(args.seed), "start": str(args.start), "end": str(args.end), "max_trades_per_year": float(args.max_trades_per_year), "params": {k: asdict(p)[k] for k in param_cols}})
|
||||
|
||||
|
||||
results: list[dict[str, Any]] = []
|
||||
rows_for_db: list[dict[str, Any]] = []
|
||||
|
||||
def record_row(row: dict[str, Any]) -> None:
|
||||
nonlocal best_ann
|
||||
results.append(row)
|
||||
|
||||
if float(row["ann_return"]) > best_ann:
|
||||
best_ann = float(row["ann_return"])
|
||||
state["best"] = row
|
||||
save_state(state_path, state)
|
||||
|
||||
db_row = {
|
||||
"run_id": run_id,
|
||||
"ts_utc": datetime.now(timezone.utc).isoformat(),
|
||||
"code_version": code_version,
|
||||
"config_path": str(config_path),
|
||||
"start": str(args.start),
|
||||
"end": str(args.end),
|
||||
"seed": int(args.seed),
|
||||
"trial": int(row.get("trial", -1)),
|
||||
"jobs": int(jobs),
|
||||
"ann_return": float(row["ann_return"]),
|
||||
"ann_vol": float(row["ann_vol"]),
|
||||
"max_drawdown": float(row["max_drawdown"]),
|
||||
"sharpe": float(row["sharpe"]),
|
||||
"trades_per_year": float(row["trades_per_year"]),
|
||||
}
|
||||
for c in param_cols:
|
||||
db_row[c] = row.get(c)
|
||||
rows_for_db.append(db_row)
|
||||
|
||||
if len(rows_for_db) >= 200:
|
||||
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
|
||||
rows_for_db.clear()
|
||||
|
||||
if jobs == 1:
|
||||
for task in tasks:
|
||||
row = _eval_one(task)
|
||||
if row is None:
|
||||
continue
|
||||
record_row(row)
|
||||
if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0):
|
||||
print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True)
|
||||
else:
|
||||
import multiprocessing as mp
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
|
||||
ctx = mp.get_context("fork")
|
||||
with ProcessPoolExecutor(max_workers=jobs, mp_context=ctx) as ex:
|
||||
futs = [ex.submit(_eval_one, task) for task in tasks]
|
||||
for fut in as_completed(futs):
|
||||
row = fut.result()
|
||||
if row is None:
|
||||
continue
|
||||
record_row(row)
|
||||
if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0):
|
||||
print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True)
|
||||
|
||||
if rows_for_db:
|
||||
insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
|
||||
rows_for_db.clear()
|
||||
|
||||
state["history"].append(
|
||||
{
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"run_id": run_id,
|
||||
"code_version": code_version,
|
||||
"config": str(args.config),
|
||||
"start": str(args.start),
|
||||
"end": str(args.end),
|
||||
"trials": int(args.trials),
|
||||
"jobs": int(jobs),
|
||||
"best_ann_return": float(best_ann) if np.isfinite(best_ann) else None,
|
||||
"db": str(args.db),
|
||||
}
|
||||
)
|
||||
save_state(state_path, state)
|
||||
|
||||
if not results:
|
||||
print("no valid trials")
|
||||
return
|
||||
|
||||
df = pd.DataFrame(results).sort_values(["ann_return"], ascending=False)
|
||||
|
||||
cols = [
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"trades_per_year",
|
||||
"sma_fast",
|
||||
"sma_slow",
|
||||
"lazy_days",
|
||||
"min_hold_days",
|
||||
"replace_score_gap",
|
||||
"min_score",
|
||||
"macro_min_breadth",
|
||||
"macro_down_frac",
|
||||
"desired_positions_min",
|
||||
"atr_mult",
|
||||
"stop_loss_atr",
|
||||
"profit_tighten_atr",
|
||||
"atr_mult_profit",
|
||||
"bias_exit",
|
||||
"vol_ratio_exit",
|
||||
]
|
||||
cols = [c for c in cols if c in df.columns]
|
||||
print(df[cols].head(12).to_string(index=False))
|
||||
|
||||
if best_ann >= float(last_rep) + float(args.report_step):
|
||||
state["last_reported_ann_return"] = float(best_ann)
|
||||
save_state(state_path, state)
|
||||
print("REPORT_TRIGGER", float(best_ann), "baseline", float(last_rep))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
93
scripts/report_last_run.py
Normal file
93
scripts/report_last_run.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
def fetch_top(con: sqlite3.Connection, run_id: str, limit: int) -> list[dict[str, Any]]:
|
||||
cols = [r[1] for r in con.execute("PRAGMA table_info(trials)")]
|
||||
sql = "SELECT * FROM trials WHERE run_id = ? ORDER BY ann_return DESC LIMIT ?"
|
||||
rows = []
|
||||
for r in con.execute(sql, [run_id, int(limit)]):
|
||||
rows.append(dict(zip(cols, r)))
|
||||
return rows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--state", default="data/opt_state.json")
|
||||
ap.add_argument("--db", default="data/experiments.sqlite")
|
||||
ap.add_argument("--top", type=int, default=5)
|
||||
args = ap.parse_args()
|
||||
|
||||
state_path = Path(args.state)
|
||||
state = json.loads(state_path.read_text(encoding="utf-8"))
|
||||
|
||||
hist = state.get("history") or []
|
||||
if not hist:
|
||||
raise SystemExit("no history in opt_state.json")
|
||||
|
||||
last = hist[-1]
|
||||
run_id = str(last.get("run_id"))
|
||||
|
||||
best = state.get("best")
|
||||
|
||||
print("last_run_id", run_id)
|
||||
print("last_run", {k: last.get(k) for k in ["timestamp", "seed", "trials", "jobs", "best_ann_return", "code_version"] if k in last})
|
||||
if best:
|
||||
print(
|
||||
"global_best",
|
||||
{
|
||||
"ann_return": best.get("ann_return"),
|
||||
"ann_vol": best.get("ann_vol"),
|
||||
"max_drawdown": best.get("max_drawdown"),
|
||||
"sharpe": best.get("sharpe"),
|
||||
"trades_per_year": best.get("trades_per_year"),
|
||||
},
|
||||
)
|
||||
|
||||
db_path = Path(args.db)
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
rows = fetch_top(con, run_id=run_id, limit=int(args.top))
|
||||
|
||||
if not rows:
|
||||
print("no rows for run_id")
|
||||
return
|
||||
|
||||
def slim(r: dict[str, Any]) -> dict[str, Any]:
|
||||
keys = [
|
||||
"id",
|
||||
"trial",
|
||||
"ann_return",
|
||||
"ann_vol",
|
||||
"max_drawdown",
|
||||
"sharpe",
|
||||
"trades_per_year",
|
||||
"sma_fast",
|
||||
"sma_slow",
|
||||
"lazy_days",
|
||||
"min_hold_days",
|
||||
"replace_score_gap",
|
||||
"min_score",
|
||||
"macro_min_breadth",
|
||||
"macro_down_frac",
|
||||
"desired_positions_min",
|
||||
"atr_mult",
|
||||
"stop_loss_atr",
|
||||
"profit_tighten_atr",
|
||||
"atr_mult_profit",
|
||||
"bias_exit",
|
||||
"vol_ratio_exit",
|
||||
]
|
||||
return {k: r.get(k) for k in keys if k in r}
|
||||
|
||||
print("top_trials")
|
||||
for r in rows:
|
||||
print(json.dumps(slim(r), ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
158
scripts/run_etf_trend_backtest.py
Normal file
158
scripts/run_etf_trend_backtest.py
Normal file
@@ -0,0 +1,158 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from dataclasses import fields
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset]) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet"
|
||||
if not fn.exists():
|
||||
raise FileNotFoundError(f"missing data file: {fn}")
|
||||
df = pd.read_parquet(fn)
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252**0.5))
|
||||
dd = (equity / equity.cummax() - 1.0).min()
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": float(dd)}
|
||||
|
||||
|
||||
def add_trendparams_args(p: argparse.ArgumentParser) -> None:
|
||||
# Expose a subset of TrendParams for fast experiments / grid search verification.
|
||||
# Keep names stable and CLI-friendly (kebab-case).
|
||||
tp_fields = {f.name: f for f in fields(TrendParams)}
|
||||
|
||||
def add(name: str, arg: str, typ, help_: str) -> None:
|
||||
if name not in tp_fields:
|
||||
return
|
||||
p.add_argument(arg, type=typ, default=None, help=help_)
|
||||
|
||||
add("sma_fast", "--sma-fast", int, "SMA fast window")
|
||||
add("sma_slow", "--sma-slow", int, "SMA slow window")
|
||||
add("lazy_days", "--lazy-days", int, "Min days between switches")
|
||||
add("min_hold_days", "--min-hold-days", int, "Min hold days before trend-exit/switch")
|
||||
add("replace_score_gap", "--replace-score-gap", float, "Replace weakest only if score gap >= this")
|
||||
add("min_score", "--min-score", float, "Entry score threshold (allow empty if not met)")
|
||||
add("macro_down_frac", "--macro-down-frac", float, "Down-day breadth threshold for consistent down")
|
||||
add("desired_positions_min", "--desired-positions-min", int, "Desired min positions (allow empty)")
|
||||
add("desired_positions_max", "--desired-positions-max", int, "Desired max positions")
|
||||
add("rebalance_band", "--rebalance-band", float, "Ignore small weight changes")
|
||||
|
||||
add("atr_mult", "--atr-mult", float, "Chandelier ATR multiple")
|
||||
add("profit_tighten_atr", "--profit-tighten-atr", float, "Tighten trailing after profit >= N*ATR")
|
||||
add("atr_mult_profit", "--atr-mult-profit", float, "Chandelier ATR multiple after tighten")
|
||||
add("stop_loss_atr", "--stop-loss-atr", float, "Hard stop loss from entry in ATR")
|
||||
|
||||
add("bias_exit", "--bias-exit", float, "Exit when abs(bias) >= threshold")
|
||||
add("vol_ratio_exit", "--vol-ratio-exit", float, "Exit when volume/amount ratio >= threshold")
|
||||
|
||||
add("max_weight_per_asset", "--max-weight-per-asset", float, "Max weight per risky asset")
|
||||
add("concentration_power", "--concentration-power", float, "Weight concentration power")
|
||||
|
||||
add("macro_min_breadth", "--macro-min-breadth", float, "Min equity breadth to be risk-on")
|
||||
add("macro_scale_risk_off", "--macro-scale-risk-off", float, "Scale risky weights in risk-off")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--config", default="configs/etf_universe.json")
|
||||
p.add_argument("--rawdir", default="data/raw")
|
||||
p.add_argument("--out", default="data/etf_trend_equity.parquet")
|
||||
p.add_argument("--start", default="20200101", help="Filter start trade_date YYYYMMDD (inclusive)")
|
||||
p.add_argument("--end", default="20251231", help="Filter end trade_date YYYYMMDD (inclusive)")
|
||||
add_trendparams_args(p)
|
||||
args = p.parse_args()
|
||||
|
||||
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 4)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 1)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 1)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 1)),
|
||||
)
|
||||
|
||||
params = TrendParams(max_positions=constraints.max_positions)
|
||||
|
||||
# apply CLI overrides
|
||||
overrides = {
|
||||
"sma_fast": args.sma_fast,
|
||||
"sma_slow": args.sma_slow,
|
||||
"lazy_days": args.lazy_days,
|
||||
"min_hold_days": getattr(args, "min_hold_days", None),
|
||||
"replace_score_gap": getattr(args, "replace_score_gap", None),
|
||||
"min_score": getattr(args, "min_score", None),
|
||||
"macro_down_frac": getattr(args, "macro_down_frac", None),
|
||||
"desired_positions_min": getattr(args, "desired_positions_min", None),
|
||||
"desired_positions_max": getattr(args, "desired_positions_max", None),
|
||||
"rebalance_band": args.rebalance_band,
|
||||
"atr_mult": args.atr_mult,
|
||||
"profit_tighten_atr": args.profit_tighten_atr,
|
||||
"atr_mult_profit": args.atr_mult_profit,
|
||||
"stop_loss_atr": args.stop_loss_atr,
|
||||
"bias_exit": args.bias_exit,
|
||||
"vol_ratio_exit": args.vol_ratio_exit,
|
||||
"max_weight_per_asset": args.max_weight_per_asset,
|
||||
"concentration_power": args.concentration_power,
|
||||
"macro_min_breadth": args.macro_min_breadth,
|
||||
"macro_scale_risk_off": args.macro_scale_risk_off,
|
||||
}
|
||||
overrides = {k: v for k, v in overrides.items() if v is not None}
|
||||
if overrides:
|
||||
params = TrendParams(**{**params.__dict__, **overrides})
|
||||
|
||||
risk_proxy = cons.get("risk_proxy", "510300.SH")
|
||||
|
||||
rates_fallback = cons.get("rates_fallback")
|
||||
if rates_fallback is None:
|
||||
for a in universe:
|
||||
if a.asset_class.startswith("rates"):
|
||||
rates_fallback = a.ts_code
|
||||
break
|
||||
if not rates_fallback:
|
||||
raise RuntimeError("universe must include a rates asset for fallback")
|
||||
|
||||
prices = load_prices(Path(args.rawdir), universe)
|
||||
for k, df in prices.items():
|
||||
d = df.copy()
|
||||
d["trade_date"] = d["trade_date"].astype(str)
|
||||
d = d[(d["trade_date"] >= str(args.start)) & (d["trade_date"] <= str(args.end))]
|
||||
prices[k] = d
|
||||
|
||||
equity, weights, trades = run_backtest(prices, universe, constraints, params, rates_fallback=rates_fallback, risk_proxy=risk_proxy)
|
||||
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
equity.to_parquet(out)
|
||||
|
||||
weights_path = out.with_name(out.stem + "_weights" + out.suffix)
|
||||
trades_path = out.with_name(out.stem + "_trades" + out.suffix)
|
||||
weights.to_parquet(weights_path)
|
||||
if trades is not None and not trades.empty:
|
||||
trades.to_parquet(trades_path, index=False)
|
||||
print(f"wrote trades -> {trades_path}")
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
print("perf", st)
|
||||
print("last equity", float(equity["equity"].iloc[-1]))
|
||||
print("last weights", weights.iloc[-1].sort_values(ascending=False).head(10).to_dict())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
scripts/run_iter20_loop.sh
Executable file
0
scripts/run_iter20_loop.sh
Executable file
0
scripts/run_macro20.sh
Normal file
0
scripts/run_macro20.sh
Normal file
26
scripts/smoke.py
Normal file
26
scripts/smoke.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from qfr.factors import winsorize_by_date, zscore_by_date
|
||||
from qfr.metrics import information_coefficient
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dates = pd.to_datetime(["2026-01-01", "2026-01-02", "2026-01-03"])
|
||||
assets = ["A", "B", "C", "D"]
|
||||
idx = pd.MultiIndex.from_product([dates, assets], names=["date", "asset"])
|
||||
|
||||
rng = np.random.default_rng(42)
|
||||
factor = pd.Series(rng.normal(size=len(idx)), index=idx)
|
||||
fwd_ret = pd.Series(rng.normal(scale=0.01, size=len(idx)), index=idx)
|
||||
|
||||
factor2 = zscore_by_date(winsorize_by_date(factor))
|
||||
ic = information_coefficient(factor2, fwd_ret)
|
||||
|
||||
print("IC mean:", float(ic.mean()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
35
scripts/tushare_download_daily.py
Normal file
35
scripts/tushare_download_daily.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
from qfr.data.tushare_client import fetch_daily, load_tushare_config
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--env", default=None, help="Path to .env (default: auto-detect)")
|
||||
p.add_argument("--ts-code", default=None, help="e.g. 000001.SZ")
|
||||
p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD")
|
||||
p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD")
|
||||
p.add_argument("--trade-date", default=None, help="YYYYMMDD")
|
||||
p.add_argument("--out", default="data/raw/tushare_daily.parquet")
|
||||
args = p.parse_args()
|
||||
|
||||
cfg = load_tushare_config(args.env)
|
||||
df = fetch_daily(
|
||||
cfg,
|
||||
ts_code=args.ts_code,
|
||||
trade_date=args.trade_date,
|
||||
start_date=args.start_date,
|
||||
end_date=args.end_date,
|
||||
)
|
||||
|
||||
out = Path(args.out)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(out, index=False)
|
||||
print(f"wrote {len(df)} rows -> {out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
47
scripts/tushare_download_universe.py
Normal file
47
scripts/tushare_download_universe.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.data.tushare_client import fetch_fund_daily, load_tushare_config
|
||||
|
||||
|
||||
def main() -> None:
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument("--env", default=None, help="Path to .env")
|
||||
p.add_argument("--config", default="configs/etf_universe.json")
|
||||
p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD")
|
||||
p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD")
|
||||
p.add_argument("--outdir", default="data/raw")
|
||||
args = p.parse_args()
|
||||
|
||||
cfg = load_tushare_config(args.env)
|
||||
|
||||
conf = json.loads(Path(args.config).read_text(encoding="utf-8"))
|
||||
assets = conf["assets"]
|
||||
outdir = Path(args.outdir)
|
||||
outdir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
for a in assets:
|
||||
ts_code = a["ts_code"]
|
||||
df = fetch_fund_daily(cfg, ts_code=ts_code, start_date=args.start_date, end_date=args.end_date)
|
||||
if df is None or df.empty:
|
||||
print(f"skip {ts_code}: empty")
|
||||
continue
|
||||
|
||||
# standardize columns expected by backtest
|
||||
# fund_daily provides: ts_code, trade_date, open, high, low, close, vol, amount
|
||||
keep = [c for c in ["ts_code", "trade_date", "open", "high", "low", "close", "vol", "amount"] if c in df.columns]
|
||||
df = df[keep].copy()
|
||||
df = df.sort_values("trade_date")
|
||||
|
||||
out = outdir / f"{ts_code.replace('.', '')}.parquet"
|
||||
df.to_parquet(out, index=False)
|
||||
print(f"wrote {ts_code}: {len(df)} rows -> {out}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
150
scripts/verify_topn.py
Normal file
150
scripts/verify_topn.py
Normal file
@@ -0,0 +1,150 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sqlite3
|
||||
from dataclasses import fields
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
|
||||
|
||||
|
||||
def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
|
||||
conf = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
universe = [UniverseAsset(**a) for a in conf["assets"]]
|
||||
|
||||
cons = conf.get("constraints", {})
|
||||
constraints = Constraints(
|
||||
max_positions=int(cons.get("max_positions", 3)),
|
||||
must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
|
||||
must_rates=int(cons.get("must_include", {}).get("rates", 0)),
|
||||
must_equity=int(cons.get("must_include", {}).get("equity", 0)),
|
||||
)
|
||||
|
||||
risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
|
||||
rates_fallback = cons.get("rates_fallback", "511010.SH")
|
||||
return universe, constraints, str(risk_proxy), str(rates_fallback)
|
||||
|
||||
|
||||
def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
|
||||
out: dict[str, pd.DataFrame] = {}
|
||||
for a in universe:
|
||||
fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
|
||||
df = pd.read_parquet(fn)
|
||||
df = df.copy()
|
||||
df["trade_date"] = df["trade_date"].astype(str)
|
||||
df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
|
||||
out[a.ts_code] = df
|
||||
return out
|
||||
|
||||
|
||||
def perf_stats(equity: pd.Series) -> dict[str, float]:
|
||||
r = equity.pct_change().dropna()
|
||||
if r.empty:
|
||||
return {}
|
||||
ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
|
||||
ann_vol = float(r.std(ddof=1) * (252**0.5))
|
||||
dd = float((equity / equity.cummax() - 1.0).min())
|
||||
sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
|
||||
return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
|
||||
|
||||
|
||||
def table_columns(con: sqlite3.Connection, table: str) -> list[str]:
|
||||
return [row[1] for row in con.execute(f"PRAGMA table_info({table})")]
|
||||
|
||||
|
||||
def fetch_topn(db_path: Path, run_id: str | None, topn: int) -> tuple[list[str], list[dict[str, Any]]]:
|
||||
with sqlite3.connect(str(db_path)) as con:
|
||||
cols = table_columns(con, "trials")
|
||||
where = ""
|
||||
params: list[Any] = []
|
||||
if run_id:
|
||||
where = "WHERE run_id = ?"
|
||||
params.append(run_id)
|
||||
sql = f"SELECT * FROM trials {where} ORDER BY ann_return DESC LIMIT ?"
|
||||
rows: list[dict[str, Any]] = []
|
||||
for r in con.execute(sql, [*params, int(topn)]):
|
||||
rows.append(dict(zip(cols, r)))
|
||||
return cols, rows
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser()
|
||||
ap.add_argument("--db", default="data/experiments.sqlite")
|
||||
ap.add_argument("--run_id", default=None)
|
||||
ap.add_argument("--topn", type=int, default=10)
|
||||
ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
|
||||
ap.add_argument("--rawdir", default="data/raw")
|
||||
ap.add_argument("--start", default=None)
|
||||
ap.add_argument("--end", default=None)
|
||||
ap.add_argument("--tol", type=float, default=1e-6)
|
||||
args = ap.parse_args()
|
||||
|
||||
db_path = Path(args.db)
|
||||
cols, rows = fetch_topn(db_path, args.run_id, args.topn)
|
||||
if not rows:
|
||||
print("no trials found")
|
||||
return
|
||||
|
||||
config_path = Path(args.config)
|
||||
universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
|
||||
|
||||
tp_fields = {f.name for f in fields(TrendParams)}
|
||||
# Coerce param types: sqlite stores numerics as REAL, so ints may come back as floats.
|
||||
_defaults = TrendParams()
|
||||
_field_types = {name: type(getattr(_defaults, name)) for name in tp_fields}
|
||||
|
||||
def _coerce(name: str, v):
|
||||
if v is None:
|
||||
return None
|
||||
t = _field_types.get(name)
|
||||
if t is int:
|
||||
return int(round(float(v)))
|
||||
if t is bool:
|
||||
return bool(int(round(float(v))))
|
||||
return float(v)
|
||||
|
||||
|
||||
mismatches = 0
|
||||
for idx, row in enumerate(rows, start=1):
|
||||
start = str(args.start or row.get("start") or "20200101")
|
||||
end = str(args.end or row.get("end") or "20251231")
|
||||
|
||||
prices = load_prices(Path(args.rawdir), universe, start, end)
|
||||
|
||||
params_dict: dict[str, Any] = {}
|
||||
for k in cols:
|
||||
if k in tp_fields and row.get(k) is not None:
|
||||
params_dict[k] = _coerce(k, row[k])
|
||||
params_dict.setdefault("max_positions", constraints.max_positions)
|
||||
|
||||
tp = TrendParams(**params_dict)
|
||||
equity, _weights, _trades = run_backtest(
|
||||
prices,
|
||||
universe,
|
||||
constraints,
|
||||
tp,
|
||||
rates_fallback=rates_fallback,
|
||||
risk_proxy=risk_proxy,
|
||||
)
|
||||
|
||||
st = perf_stats(equity["equity"])
|
||||
diffs = {k: float(st[k] - float(row.get(k) or 0.0)) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]}
|
||||
bad = any(abs(v) > float(args.tol) for v in diffs.values())
|
||||
if bad:
|
||||
mismatches += 1
|
||||
|
||||
tag = "MISMATCH" if bad else "OK"
|
||||
print(f"[{idx}] {tag} id={row.get('id')} run_id={row.get('run_id')} start={start} end={end}")
|
||||
print(" orig:", {k: row.get(k) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]})
|
||||
print(" re :", st)
|
||||
print(" diff:", diffs)
|
||||
|
||||
print(f"done. mismatches={mismatches}/{len(rows)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user