initial import: etf strategy project

2026-03-13 17:10:49 +08:00
commit 79ea983ca3
123 changed files with 6398 additions and 0 deletions
--- a/scripts/iterate_best_local.py
+++ b/scripts/iterate_best_local.py
@@ -0,0 +1,472 @@
+from __future__ import annotations
+
+import argparse
+import json
+import random
+import sqlite3
+from dataclasses import asdict, fields, replace
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import pandas as pd
+
+from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest
+
+
+def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]:
+    conf = json.loads(config_path.read_text(encoding="utf-8"))
+    universe = [UniverseAsset(**a) for a in conf["assets"]]
+
+    cons = conf.get("constraints", {})
+    constraints = Constraints(
+        max_positions=int(cons.get("max_positions", 3)),
+        must_commodity=int(cons.get("must_include", {}).get("commodity", 0)),
+        must_rates=int(cons.get("must_include", {}).get("rates", 0)),
+        must_equity=int(cons.get("must_include", {}).get("equity", 0)),
+    )
+
+    risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH")
+    rates_fallback = cons.get("rates_fallback", "511010.SH")
+    return universe, constraints, str(risk_proxy), str(rates_fallback)
+
+
+def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]:
+    out: dict[str, pd.DataFrame] = {}
+    for a in universe:
+        fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet")
+        df = pd.read_parquet(fn)
+        df = df.copy()
+        df["trade_date"] = df["trade_date"].astype(str)
+        df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)]
+        out[a.ts_code] = df
+    return out
+
+
+def perf_stats(equity: pd.Series) -> dict[str, float]:
+    r = equity.pct_change().dropna()
+    if r.empty:
+        return {}
+    ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1)
+    ann_vol = float(r.std(ddof=1) * (252**0.5))
+    dd = float((equity / equity.cummax() - 1.0).min())
+    sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan")
+    return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe}
+
+
+def trades_per_year(trades: pd.DataFrame | None, start: str, end: str) -> float:
+    if trades is None or getattr(trades, "empty", True):
+        return 0.0
+    years = max(1, (int(end[:4]) - int(start[:4]) + 1))
+    return float(len(trades) / years)
+
+
+def ensure_db(db_path: Path, param_cols: list[str]) -> None:
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    with sqlite3.connect(str(db_path)) as con:
+        con.execute("PRAGMA journal_mode=WAL")
+        con.execute("PRAGMA synchronous=NORMAL")
+        con.execute(
+            """
+            CREATE TABLE IF NOT EXISTS trials (
+              id INTEGER PRIMARY KEY AUTOINCREMENT,
+              run_id TEXT NOT NULL,
+              ts_utc TEXT NOT NULL,
+              code_version TEXT,
+              config_path TEXT,
+              start TEXT,
+              end TEXT,
+              seed INTEGER,
+              trial INTEGER,
+              jobs INTEGER,
+              ann_return REAL,
+              ann_vol REAL,
+              max_drawdown REAL,
+              sharpe REAL,
+              trades_per_year REAL
+            )
+            """
+        )
+        for c in param_cols:
+            try:
+                con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL")
+            except sqlite3.OperationalError:
+                pass
+
+
+def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None:
+    if not rows:
+        return
+    cols = [
+        "run_id",
+        "ts_utc",
+        "code_version",
+        "config_path",
+        "start",
+        "end",
+        "seed",
+        "trial",
+        "jobs",
+        "ann_return",
+        "ann_vol",
+        "max_drawdown",
+        "sharpe",
+        "trades_per_year",
+        *param_cols,
+    ]
+    q = ",".join(["?"] * len(cols))
+    join_cols = ",".join(cols)
+    sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})"
+    vals = []
+    for r in rows:
+        vals.append([r.get(c) for c in cols])
+    with sqlite3.connect(str(db_path)) as con:
+        con.executemany(sql, vals)
+        con.commit()
+
+
+def load_state(path: Path) -> dict:
+    if path.exists():
+        return json.loads(path.read_text(encoding="utf-8"))
+    return {"best": None, "last_reported_ann_return": None, "history": []}
+
+
+def save_state(path: Path, state: dict) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
+
+
+def infer_code_version(repo_dir: Path) -> str:
+    head = repo_dir / ".git" / "HEAD"
+    if head.exists():
+        try:
+            txt = head.read_text(encoding="utf-8").strip()
+            if txt.startswith("ref:"):
+                ref = txt.split(" ", 1)[1]
+                ref_path = repo_dir / ".git" / ref
+                if ref_path.exists():
+                    return ref_path.read_text(encoding="utf-8").strip()
+            return txt
+        except Exception:
+            return "unknown"
+    return "nogit"
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json")
+    ap.add_argument("--rawdir", default="data/raw")
+    ap.add_argument("--start", default="20200101")
+    ap.add_argument("--end", default="20251231")
+    ap.add_argument("--trials", type=int, default=20)
+    ap.add_argument("--seed", type=int, default=1)
+    ap.add_argument("--jobs", type=int, default=1)
+    ap.add_argument("--state", default="data/opt_state.json")
+    ap.add_argument("--db", default="data/experiments.sqlite")
+    ap.add_argument("--max_trades_per_year", type=float, default=80.0)
+    ap.add_argument("--progress_every", type=int, default=5)
+    ap.add_argument(
+        "--tweak",
+        action="append",
+        default=[],
+        help=(
+            "Enable a tweak group. Repeatable. Options: macro, churn, stops, score, switches, switches2, signal1, orth_ma, orth_weights, orth_mech, asym_fast, positions, exits. "
+            "(Each group adjusts <=4 params around current best.)"
+        ),
+    )
+    args = ap.parse_args()
+
+    rng = random.Random(int(args.seed))
+    np.random.seed(int(args.seed))
+
+    config_path = Path(args.config)
+    universe, constraints, risk_proxy, rates_fallback = load_universe(config_path)
+    prices = load_prices(Path(args.rawdir), universe, str(args.start), str(args.end))
+
+    state_path = Path(args.state)
+    state = load_state(state_path)
+    best_row = state.get("best")
+    if not best_row:
+        raise SystemExit("opt_state.json missing best")
+
+    tp_fields = {f.name for f in fields(TrendParams)}
+
+    defaults = TrendParams(max_positions=constraints.max_positions)
+    best_params = {k: best_row[k] for k in best_row.keys() if k in tp_fields}
+
+    typed: dict[str, Any] = {}
+    for k, v in best_params.items():
+        t = type(getattr(defaults, k))
+        if t is int:
+            typed[k] = int(v)
+        elif t is float:
+            typed[k] = float(v)
+        else:
+            typed[k] = v
+
+    base = replace(defaults, **typed)
+
+    tweaks = set(args.tweak or [])
+
+    def sample_params() -> TrendParams:
+        p = base
+
+        if "macro" in tweaks:
+            p = replace(
+                p,
+                macro_min_breadth=float(rng.choice([0.10, 0.12, 0.15, 0.18, 0.20])),
+                macro_down_frac=float(rng.choice([0.75, 0.78, 0.80, 0.82, 0.85])),
+            )
+
+        if "churn" in tweaks:
+            p = replace(
+                p,
+                lazy_days=int(rng.choice([6, 8, 10])),
+                min_hold_days=int(rng.choice([2, 3, 4, 5])),
+                replace_score_gap=float(rng.choice([0.5, 0.8, 1.2, 1.6])),
+            )
+
+        if "switches" in tweaks:
+            # switch/constraint knobs (exactly 4 factors)
+            p = replace(
+                p,
+                desired_positions_min=int(rng.choice([1, 2, 3])),
+                replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
+                lazy_days=int(rng.choice([4, 6, 8, 10, 12])),
+                min_hold_days=int(rng.choice([1, 2, 3, 4, 5])),
+            )
+
+        if "switches2" in tweaks:
+            # route D churn control without forcing higher min holdings (desired_positions_min fixed)
+            # exactly 4 factors: replace_score_gap, lazy_days, min_hold_days, cooldown_days
+            p = replace(
+                p,
+                desired_positions_min=int(1),
+                replace_score_gap=float(rng.choice([0.5, 0.8, 1.0, 1.2, 1.6])),
+                lazy_days=int(rng.choice([8, 10, 12, 14, 16])),
+                min_hold_days=int(rng.choice([3, 5, 7, 10])),
+                cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
+            )
+
+        if "signal1" in tweaks:
+            # route D: improve signal quality (exactly 4 factors)
+            p = replace(
+                p,
+                min_score=float(rng.choice([0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])),
+                trend_strength_weight=float(rng.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])),
+                score_vol_denom_floor=float(rng.choice([0.01, 0.02, 0.03, 0.04, 0.05])),
+                macro_min_breadth=float(rng.choice([0.10, 0.15, 0.20, 0.25, 0.30])),
+            )
+
+
+        if "orth_ma" in tweaks:
+            # route R: orthogonal to score/stops/exits; explore timing knobs (exactly 4 factors)
+            p = replace(
+                p,
+                sma_fast=int(rng.choice([3, 5, 7, 9, 12])),
+                sma_slow=int(rng.choice([20, 30, 40, 60, 90])),
+                rebalance_every=int(rng.choice([1, 2, 3, 5])),
+                max_replaces_per_day=int(rng.choice([0, 1, 2])),
+            )
+            if p.sma_fast >= p.sma_slow:
+                p = replace(p, sma_fast=max(3, int(p.sma_slow // 6)))
+
+
+        if "orth_weights" in tweaks:
+            # route R: orthogonal portfolio weight shape (exactly 4 factors)
+            max_positions = int(rng.choice([2, 3, 4, 5]))
+            desired_min = int(rng.choice([1, 2, 3]))
+            desired_max = int(rng.choice([2, 3, 4, 5]))
+            desired_min = min(desired_min, desired_max)
+            desired_max = min(desired_max, max_positions)
+            desired_min = min(desired_min, desired_max)
+            p = replace(
+                p,
+                max_positions=max_positions,
+                desired_positions_min=desired_min,
+                desired_positions_max=desired_max,
+                max_weight_per_asset=float(rng.choice([0.35, 0.45, 0.60, 0.75, 0.90, 1.00])),
+            )
+            # concentration_power exists in TrendParams; adjust it separately (still counts as one factor)
+            p = replace(p, concentration_power=float(rng.choice([1.2, 1.6, 2.0, 2.2, 2.6, 3.0])))
+
+
+        if "orth_mech" in tweaks:
+            # route R: mechanism/turnover knobs (exactly 4 factors)
+            p = replace(
+                p,
+                rebalance_every=int(rng.choice([1, 2, 3, 5])),
+                replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])),
+                max_replaces_per_day=int(rng.choice([0, 1, 2, 3])),
+                cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])),
+            )
+
+
+        if "asym_fast" in tweaks:
+            # asymmetric bull/bear risk controls (fast-run) (exactly 4 factors)
+            p = replace(
+                p,
+                regime_confirm_days=int(rng.choice([2, 3, 4, 5])),
+                bull_atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
+                bear_atr_mult=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
+                bear_stop_loss_atr=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])),
+            )
+
+
+        if "positions" in tweaks:
+            # concentration/positioning knobs (exactly 4 factors)
+            max_positions = int(rng.choice([2, 3, 4]))
+            desired_min = int(rng.choice([1, 2, 3]))
+            desired_max = int(rng.choice([2, 3, 4]))
+            # keep consistent
+            desired_min = min(desired_min, desired_max)
+            desired_max = min(desired_max, max_positions)
+            desired_min = min(desired_min, desired_max)
+            p = replace(
+                p,
+                max_positions=max_positions,
+                desired_positions_min=desired_min,
+                desired_positions_max=desired_max,
+                max_weight_per_asset=float(rng.choice([0.45, 0.60, 0.75, 0.90, 1.00])),
+            )
+
+        if "stops" in tweaks:
+            # risk-control fine search (route D: prefer higher sharpe / lower drawdown)
+            p = replace(
+                p,
+                atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])),
+                stop_loss_atr=float(rng.choice([2.4, 2.6, 2.8, 3.0, 3.2])),
+                profit_tighten_atr=float(rng.choice([4.0, 6.0, 8.0])),
+                atr_mult_profit=float(rng.choice([1.3, 1.5, 1.8, 2.0])),
+            )
+
+        if "exits" in tweaks:
+            # anomaly exits fine search (route D) - exactly 4 factors
+            p = replace(
+                p,
+                bias_window=int(rng.choice([10, 15, 20, 30])),
+                bias_exit=float(rng.choice([0.12, 0.16, 0.20, 0.25, 0.30])),
+                vol_short=int(rng.choice([3, 5, 8, 10])),
+                vol_ratio_exit=float(rng.choice([2.0, 2.5, 3.0, 3.5, 4.0])),
+            )
+
+        if "score" in tweaks:
+            # aggressive weight search for higher ann_return
+            p = replace(
+                p,
+                min_score=float(rng.choice([-0.10, 0.00, 0.05, 0.10, 0.20, 0.30, 0.40])),
+                trend_strength_weight=float(rng.choice([0.00, 0.20, 0.40, 0.60, 0.80, 1.00])),
+                w_r20=float(rng.choice([0.20, 0.35, 0.50, 0.65, 0.80])),
+                w_r60=float(rng.choice([0.00, 0.10, 0.20, 0.35, 0.50])),
+            )
+            remain = 1.0 - (p.w_r20 + p.w_r60)
+            w_r5 = float(max(0.0, min(0.6, remain * 0.6)))
+            w_r120 = float(max(0.0, remain - w_r5))
+            p = replace(p, w_r5=w_r5, w_r120=w_r120)
+
+        return p
+
+    param_cols = sorted(asdict(base).keys())
+    db_path = Path(args.db)
+    ensure_db(db_path, param_cols=param_cols)
+
+    run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_bestlocal_seed{int(args.seed)}" + ("_" + "-".join(sorted(tweaks)) if tweaks else "")
+    code_version = infer_code_version(Path("."))
+
+    best_ann = float(best_row.get("ann_return") or float("-inf"))
+
+    rows_for_db: list[dict[str, Any]] = []
+    valid = 0
+    for t in range(int(args.trials)):
+        p = sample_params()
+
+        equity, _w, tr = run_backtest(
+            prices,
+            universe,
+            constraints,
+            p,
+            rates_fallback=rates_fallback,
+            risk_proxy=risk_proxy,
+        )
+        st = perf_stats(equity["equity"])
+        if not st:
+            continue
+
+        tpy = trades_per_year(tr, str(args.start), str(args.end))
+        if tpy > float(args.max_trades_per_year):
+            continue
+
+        valid += 1
+        row = {**st, "trades_per_year": float(tpy), **asdict(p)}
+        row["trial"] = int(t)
+        row["seed"] = int(args.seed)
+
+        if float(row["ann_return"]) > best_ann:
+            best_ann = float(row["ann_return"])
+            state["best"] = row
+            save_state(state_path, state)
+
+        db_row = {
+            "run_id": run_id,
+            "ts_utc": datetime.now(timezone.utc).isoformat(),
+            "code_version": code_version,
+            "config_path": str(config_path),
+            "start": str(args.start),
+            "end": str(args.end),
+            "seed": int(args.seed),
+            "trial": int(t),
+            "jobs": int(args.jobs),
+            "ann_return": float(row["ann_return"]),
+            "ann_vol": float(row["ann_vol"]),
+            "max_drawdown": float(row["max_drawdown"]),
+            "sharpe": float(row["sharpe"]),
+            "trades_per_year": float(row["trades_per_year"]),
+        }
+        for c in param_cols:
+            db_row[c] = row.get(c)
+        rows_for_db.append(db_row)
+
+        if int(args.progress_every) > 0 and valid % int(args.progress_every) == 0:
+            print(f"progress valid={valid} best_ann={best_ann:.4f}", flush=True)
+
+    if rows_for_db:
+        insert_rows(db_path, param_cols=param_cols, rows=rows_for_db)
+
+    state.setdefault("history", []).append(
+        {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "run_id": run_id,
+            "code_version": code_version,
+            "config": str(args.config),
+            "start": str(args.start),
+            "end": str(args.end),
+            "trials": int(args.trials),
+            "jobs": int(args.jobs),
+            "best_ann_return": float(best_ann) if np.isfinite(best_ann) else None,
+            "db": str(args.db),
+            "base_from": "opt_state.best",
+            "tweaks": sorted(tweaks),
+        }
+    )
+    save_state(state_path, state)
+
+    df = pd.DataFrame(rows_for_db).sort_values(["ann_return"], ascending=False)
+    view_cols = [
+        "ann_return",
+        "ann_vol",
+        "max_drawdown",
+        "sharpe",
+        "trades_per_year",
+        "atr_mult",
+        "stop_loss_atr",
+        "profit_tighten_atr",
+        "atr_mult_profit",
+    ]
+    view_cols = [c for c in view_cols if c in df.columns]
+    print("run_id", run_id)
+    print(df[view_cols].head(8).to_string(index=False))
+
+
+if __name__ == "__main__":
+    main()