commit 79ea983ca379d2550ea2d6ab37f1255a75b8652d Author: openclaw Date: Fri Mar 13 17:10:49 2026 +0800 initial import: etf strategy project diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d4fe0e --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +# Python +__pycache__/ +*.pyc +.venv/ + +# Jupyter +.ipynb_checkpoints/ + +# Data +/data/raw/ +/data/processed/ + +# Secrets +.env + +# OS +.DS_Store diff --git a/OPENCLAW.md b/OPENCLAW.md new file mode 100644 index 0000000..fe27ce7 --- /dev/null +++ b/OPENCLAW.md @@ -0,0 +1,17 @@ +# Operations Notes + +Project home: + +- `/home/openclaw/projects/quant-factor-research` + +This is a research workspace (not a service). If you later want to run scheduled jobs, +we can add: + +- a Docker image + cron +- a Jupyter server behind Caddy +- a factor computation API + +## 2026-03-07 用户迭代准则 + +- 基于已达标的基础策略(年化 25%+)做增量迭代;不要换主框架。 +- 单次微调因子不超过 4 个,确保可归因/可回滚。 diff --git a/README.md b/README.md new file mode 100644 index 0000000..6401476 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# Quant Factor Research (QFR) + +A lightweight, reproducible workspace for researching, backtesting, and evaluating quantitative equity factors. + +## Goals + +- Factor definition library (cross-sectional / time-series) +- Data ingestion + caching +- Standardized preprocessing (winsorize, z-score, neutralization) +- IC / rank IC / turnover / decay analysis +- Simple backtests (long-short / top-k) with transaction cost hooks + +## Quickstart + +1) Create env (pick one) + +- Conda: + - `conda create -n qfr python=3.11 -y` + - `conda activate qfr` + - `pip install -r requirements.txt` + +- venv: + - `python3 -m venv .venv && source .venv/bin/activate` + - `pip install -r requirements.txt` + - Note: some servers ship Python without ensurepip/venv support; you may need the OS package `python3-venv` (root required). + +2) Run a smoke test + +- `python -c "import qfr; print('ok')"` + +## Layout + +- `src/qfr/` core library +- `notebooks/` research notebooks +- `data/raw/` raw data (not committed) +- `data/processed/` derived data (not committed) +- `configs/` config templates +- `scripts/` CLI utilities + +## Notes + +- Keep secrets out of git. Use `.env` locally. diff --git a/configs/etf_universe.json b/configs/etf_universe.json new file mode 100644 index 0000000..84062e5 --- /dev/null +++ b/configs/etf_universe.json @@ -0,0 +1,408 @@ +{ + "version": 1, + "description": "Default ETF universe for trend-following (edit ts_code list after verifying tradability).", + "assets": [ + { + "ts_code": "510300.SH", + "asset_class": "equity_cn", + "name": "CSI300 ETF" + }, + { + "ts_code": "510500.SH", + "asset_class": "equity_cn", + "name": "CSI500 ETF" + }, + { + "ts_code": "159915.SZ", + "asset_class": "equity_cn", + "name": "ChiNext ETF" + }, + { + "ts_code": "588000.SH", + "asset_class": "equity_cn", + "name": "STAR50 ETF" + }, + { + "ts_code": "510880.SH", + "asset_class": "equity_cn", + "name": "Dividend ETF" + }, + { + "ts_code": "513100.SH", + "asset_class": "equity_qdii", + "name": "NASDAQ100 ETF (QDII)" + }, + { + "ts_code": "513500.SH", + "asset_class": "equity_qdii", + "name": "S&P 500 ETF (QDII)" + }, + { + "ts_code": "513800.SH", + "asset_class": "equity_qdii", + "name": "Nikkei 225 ETF (QDII)" + }, + { + "ts_code": "513030.SH", + "asset_class": "equity_qdii", + "name": "Germany ETF (QDII)" + }, + { + "ts_code": "511010.SH", + "asset_class": "rates", + "name": "Treasury ETF" + }, + { + "ts_code": "518880.SH", + "asset_class": "commodity_precious", + "name": "Gold ETF" + }, + { + "ts_code": "159980.SZ", + "asset_class": "commodity_metals", + "name": "Non-ferrous / Metals ETF" + }, + { + "ts_code": "159985.SZ", + "asset_class": "commodity_agri", + "name": "Soymeal ETF" + }, + { + "ts_code": "159870.SZ", + "asset_class": "commodity_chem", + "name": "Chemicals ETF" + }, + { + "ts_code": "513310.SH", + "asset_class": "equity_cn_sector", + "name": "\u4e2d\u97e9\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "588200.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u82af\u7247ETF" + }, + { + "ts_code": "515880.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF" + }, + { + "ts_code": "159994.SZ", + "asset_class": "equity_cn_sector", + "name": "5GETF" + }, + { + "ts_code": "561330.SH", + "asset_class": "equity_cn_sector", + "name": "\u77ff\u4e1aETF" + }, + { + "ts_code": "512400.SH", + "asset_class": "equity_cn_sector", + "name": "\u6709\u8272\u91d1\u5c5eETF" + }, + { + "ts_code": "516150.SH", + "asset_class": "equity_cn_sector", + "name": "\u7a00\u571fETF\u5609\u5b9e" + }, + { + "ts_code": "588010.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF" + }, + { + "ts_code": "516800.SH", + "asset_class": "equity_cn_sector", + "name": "\u667a\u80fd\u5236\u9020ETF" + }, + { + "ts_code": "562500.SH", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "159667.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5de5\u4e1a\u6bcd\u673aETF" + }, + { + "ts_code": "512710.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5\u9f99\u5934ETF" + }, + { + "ts_code": "159732.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "588790.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521bAIETF\u535a\u65f6" + }, + { + "ts_code": "512480.SH", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "159516.SZ", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53\u8bbe\u5907ETF" + }, + { + "ts_code": "159995.SZ", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "512760.SH", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "515050.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF\u534e\u590f" + }, + { + "ts_code": "159583.SZ", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1\u8bbe\u5907ETF" + }, + { + "ts_code": "159811.SZ", + "asset_class": "equity_cn_sector", + "name": "5G50ETF" + }, + { + "ts_code": "512660.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF" + }, + { + "ts_code": "512680.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF\u5e7f\u53d1" + }, + { + "ts_code": "159530.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "159770.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "562950.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "561600.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "515070.SH", + "asset_class": "equity_cn_sector", + "name": "\u4eba\u5de5\u667a\u80fdAIETF" + }, + { + "ts_code": "512930.SH", + "asset_class": "equity_cn_sector", + "name": "AI\u4eba\u5de5\u667a\u80fdETF" + }, + { + "ts_code": "159852.SZ", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "515230.SH", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "513120.SH", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159570.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u901a\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159892.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6052\u751f\u533b\u836fETF" + }, + { + "ts_code": "512010.SH", + "asset_class": "equity_cn_sector", + "name": "\u533b\u836fETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "516160.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90ETF" + }, + { + "ts_code": "515030.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90\u8f66ETF" + }, + { + "ts_code": "515790.SH", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159857.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159840.SZ", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "561160.SH", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "159755.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c60ETF" + }, + { + "ts_code": "159796.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c6050ETF" + }, + { + "ts_code": "159690.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u8272\u77ff\u4e1aETF\u62db\u5546" + }, + { + "ts_code": "560860.SH", + "asset_class": "commodity_cn", + "name": "\u5de5\u4e1a\u6709\u8272ETF" + }, + { + "ts_code": "159652.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u827250ETF" + }, + { + "ts_code": "516780.SH", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159713.SZ", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159761.SZ", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u6750\u659950ETF" + }, + { + "ts_code": "588160.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF\u5357\u65b9" + }, + { + "ts_code": "159934.SZ", + "asset_class": "commodity_cn", + "name": "\u9ec4\u91d1ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "159937.SZ", + "asset_class": "commodity_cn", + "name": "\u9ec4\u91d1ETF\u535a\u65f6" + }, + { + "ts_code": "161226.SZ", + "asset_class": "commodity_cn", + "name": "\u56fd\u6295\u767d\u94f6LOF" + }, + { + "ts_code": "501018.SH", + "asset_class": "commodity_cn", + "name": "\u5357\u65b9\u539f\u6cb9LOF" + }, + { + "ts_code": "161129.SZ", + "asset_class": "commodity_cn", + "name": "\u539f\u6cb9LOF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "515220.SH", + "asset_class": "commodity_cn", + "name": "\u7164\u70adETF" + }, + { + "ts_code": "161032.SZ", + "asset_class": "commodity_cn", + "name": "\u7164\u70ad\u9f99\u5934LOF" + }, + { + "ts_code": "159981.SZ", + "asset_class": "commodity_cn", + "name": "\u80fd\u6e90\u5316\u5de5ETF" + }, + { + "ts_code": "516020.SH", + "asset_class": "commodity_cn", + "name": "\u5316\u5de5ETF" + }, + { + "ts_code": "159825.SZ", + "asset_class": "commodity_cn", + "name": "\u519c\u4e1aETF" + }, + { + "ts_code": "516810.SH", + "asset_class": "commodity_cn", + "name": "\u519c\u4e1aETF\u534e\u590f" + }, + { + "ts_code": "511100.SH", + "asset_class": "rates_cn", + "name": "\u56fd\u503aETF\u534e\u590f" + }, + { + "ts_code": "511090.SH", + "asset_class": "rates_cn", + "name": "30\u5e74\u56fd\u503aETF" + }, + { + "ts_code": "511520.SH", + "asset_class": "rates_cn", + "name": "\u653f\u91d1\u503a\u5238ETF" + } + ], + "constraints": { + "max_positions": 4, + "must_include": { + "commodity": 0, + "rates": 0, + "equity": 0 + }, + "risk_proxy": "510300.SH", + "rates_fallback": "511010.SH", + "backtest_default_start": "20200101", + "backtest_default_end": "20251231" + } +} diff --git a/configs/etf_universe_industry_extended.json b/configs/etf_universe_industry_extended.json new file mode 100644 index 0000000..bbd0e35 --- /dev/null +++ b/configs/etf_universe_industry_extended.json @@ -0,0 +1,498 @@ +{ + "version": 1, + "description": "Default ETF universe for trend-following (edit ts_code list after verifying tradability).", + "assets": [ + { + "ts_code": "588000.SH", + "asset_class": "equity_cn", + "name": "STAR50 ETF" + }, + { + "ts_code": "510880.SH", + "asset_class": "equity_cn", + "name": "Dividend ETF" + }, + { + "ts_code": "513100.SH", + "asset_class": "equity_qdii", + "name": "NASDAQ100 ETF (QDII)" + }, + { + "ts_code": "513500.SH", + "asset_class": "equity_qdii", + "name": "S&P 500 ETF (QDII)" + }, + { + "ts_code": "513800.SH", + "asset_class": "equity_qdii", + "name": "Nikkei 225 ETF (QDII)" + }, + { + "ts_code": "513030.SH", + "asset_class": "equity_qdii", + "name": "Germany ETF (QDII)" + }, + { + "ts_code": "513310.SH", + "asset_class": "equity_cn_sector", + "name": "\u4e2d\u97e9\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "588200.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u82af\u7247ETF" + }, + { + "ts_code": "515880.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF" + }, + { + "ts_code": "159994.SZ", + "asset_class": "equity_cn_sector", + "name": "5GETF" + }, + { + "ts_code": "561330.SH", + "asset_class": "equity_cn_sector", + "name": "\u77ff\u4e1aETF" + }, + { + "ts_code": "512400.SH", + "asset_class": "equity_cn_sector", + "name": "\u6709\u8272\u91d1\u5c5eETF" + }, + { + "ts_code": "516150.SH", + "asset_class": "equity_cn_sector", + "name": "\u7a00\u571fETF\u5609\u5b9e" + }, + { + "ts_code": "588010.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF" + }, + { + "ts_code": "516800.SH", + "asset_class": "equity_cn_sector", + "name": "\u667a\u80fd\u5236\u9020ETF" + }, + { + "ts_code": "562500.SH", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "159667.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5de5\u4e1a\u6bcd\u673aETF" + }, + { + "ts_code": "512710.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5\u9f99\u5934ETF" + }, + { + "ts_code": "159732.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "588790.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521bAIETF\u535a\u65f6" + }, + { + "ts_code": "512480.SH", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "159516.SZ", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53\u8bbe\u5907ETF" + }, + { + "ts_code": "159995.SZ", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "512760.SH", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "515050.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF\u534e\u590f" + }, + { + "ts_code": "159583.SZ", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1\u8bbe\u5907ETF" + }, + { + "ts_code": "159811.SZ", + "asset_class": "equity_cn_sector", + "name": "5G50ETF" + }, + { + "ts_code": "512660.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF" + }, + { + "ts_code": "512680.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF\u5e7f\u53d1" + }, + { + "ts_code": "159530.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "159770.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "562950.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "561600.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "515070.SH", + "asset_class": "equity_cn_sector", + "name": "\u4eba\u5de5\u667a\u80fdAIETF" + }, + { + "ts_code": "512930.SH", + "asset_class": "equity_cn_sector", + "name": "AI\u4eba\u5de5\u667a\u80fdETF" + }, + { + "ts_code": "159852.SZ", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "515230.SH", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "513120.SH", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159570.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u901a\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159892.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6052\u751f\u533b\u836fETF" + }, + { + "ts_code": "512010.SH", + "asset_class": "equity_cn_sector", + "name": "\u533b\u836fETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "516160.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90ETF" + }, + { + "ts_code": "515030.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90\u8f66ETF" + }, + { + "ts_code": "515790.SH", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159857.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159840.SZ", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "561160.SH", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "159755.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c60ETF" + }, + { + "ts_code": "159796.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c6050ETF" + }, + { + "ts_code": "159761.SZ", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u6750\u659950ETF" + }, + { + "ts_code": "588160.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF\u5357\u65b9" + }, + { + "ts_code": "518880.SH", + "asset_class": "commodity_precious", + "name": "Gold ETF" + }, + { + "ts_code": "159980.SZ", + "asset_class": "commodity_metals", + "name": "Non-ferrous / Metals ETF" + }, + { + "ts_code": "159985.SZ", + "asset_class": "commodity_agri", + "name": "Soymeal ETF" + }, + { + "ts_code": "159870.SZ", + "asset_class": "commodity_chem", + "name": "Chemicals ETF" + }, + { + "ts_code": "159690.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u8272\u77ff\u4e1aETF\u62db\u5546" + }, + { + "ts_code": "560860.SH", + "asset_class": "commodity_cn", + "name": "\u5de5\u4e1a\u6709\u8272ETF" + }, + { + "ts_code": "159652.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u827250ETF" + }, + { + "ts_code": "516780.SH", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159713.SZ", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159792.SZ", + "asset_class": "equity_sector", + "name": "\u6e2f\u80a1\u4e92\u8054\u7f51" + }, + { + "ts_code": "515580.SH", + "asset_class": "equity_sector", + "name": "\u79d1\u6280" + }, + { + "ts_code": "159740.SZ", + "asset_class": "equity_sector", + "name": "\u6052\u751f\u79d1\u6280" + }, + { + "ts_code": "159998.SZ", + "asset_class": "equity_sector", + "name": "\u8ba1\u7b97\u673a" + }, + { + "ts_code": "159890.SZ", + "asset_class": "equity_sector", + "name": "\u4e91\u8ba1\u7b97" + }, + { + "ts_code": "159786.SZ", + "asset_class": "equity_sector", + "name": "VR" + }, + { + "ts_code": "512980.SH", + "asset_class": "equity_sector", + "name": "\u4f20\u5a92" + }, + { + "ts_code": "159869.SZ", + "asset_class": "equity_sector", + "name": "\u6e38\u620f" + }, + { + "ts_code": "516620.SH", + "asset_class": "equity_sector", + "name": "\u5f71\u89c6" + }, + { + "ts_code": "159206.SZ", + "asset_class": "equity_sector", + "name": "\u536b\u661f" + }, + { + "ts_code": "159392.SZ", + "asset_class": "equity_sector", + "name": "\u822a\u7a7a\u822a\u5929" + }, + { + "ts_code": "561380.SH", + "asset_class": "equity_sector", + "name": "\u7535\u7f51" + }, + { + "ts_code": "159566.SZ", + "asset_class": "equity_sector", + "name": "\u50a8\u80fd\u7535\u6c60" + }, + { + "ts_code": "512170.SH", + "asset_class": "equity_sector", + "name": "\u533b\u7597" + }, + { + "ts_code": "512290.SH", + "asset_class": "equity_sector", + "name": "\u751f\u7269\u533b\u836f" + }, + { + "ts_code": "159992.SZ", + "asset_class": "equity_sector", + "name": "\u521b\u65b0\u836f" + }, + { + "ts_code": "159327.SZ", + "asset_class": "equity_sector", + "name": "\u534a\u5bfc\u4f53\u8bbe\u5907" + }, + { + "ts_code": "159565.SZ", + "asset_class": "equity_sector", + "name": "\u6c7d\u8f66\u96f6\u90e8\u4ef6" + }, + { + "ts_code": "516110.SH", + "asset_class": "equity_sector", + "name": "\u6c7d\u8f66" + }, + { + "ts_code": "512690.SH", + "asset_class": "equity_sector", + "name": "\u9152/\u98df\u54c1" + }, + { + "ts_code": "159928.SZ", + "asset_class": "equity_sector", + "name": "\u6d88\u8d39" + }, + { + "ts_code": "159698.SZ", + "asset_class": "equity_sector", + "name": "\u7cae\u98df" + }, + { + "ts_code": "159766.SZ", + "asset_class": "equity_sector", + "name": "\u65c5\u6e38" + }, + { + "ts_code": "159709.SZ", + "asset_class": "equity_sector", + "name": "\u7269\u8054\u7f51" + }, + { + "ts_code": "516020.SH", + "asset_class": "equity_sector", + "name": "\u5316\u5de5" + }, + { + "ts_code": "159666.SZ", + "asset_class": "equity_sector", + "name": "\u4ea4\u901a\u8fd0\u8f93" + }, + { + "ts_code": "515220.SH", + "asset_class": "equity_sector", + "name": "\u7164\u70ad" + }, + { + "ts_code": "515210.SH", + "asset_class": "equity_sector", + "name": "\u94a2\u94c1" + }, + { + "ts_code": "512880.SH", + "asset_class": "equity_sector", + "name": "\u8bc1\u5238" + }, + { + "ts_code": "159299.SZ", + "asset_class": "equity_sector", + "name": "\u91d1\u878d\u79d1\u6280" + }, + { + "ts_code": "159937.SZ", + "asset_class": "commodity", + "name": "\u9ec4\u91d1" + }, + { + "ts_code": "159608.SZ", + "asset_class": "equity_sector", + "name": "\u7a00\u6709\u91d1\u5c5e" + }, + { + "ts_code": "159588.SZ", + "asset_class": "equity_sector", + "name": "\u77f3\u6cb9\u5929\u7136\u6c14" + }, + { + "ts_code": "511010.SH", + "asset_class": "rates", + "name": "\u56fd\u503a" + }, + { + "ts_code": "159745.SZ", + "asset_class": "equity_sector", + "name": "\u5efa\u6750" + }, + { + "ts_code": "161226.SZ", + "asset_class": "commodity", + "name": "\u767d\u94f6" + } + ], + "constraints": { + "max_positions": 3, + "must_include": { + "equity": 1, + "rates": 0, + "commodity": 0 + }, + "risk_proxy": "588000.SH", + "rates_fallback": "511010.SH", + "backtest_default_start": "20200101", + "backtest_default_end": "20251231" + } +} diff --git a/configs/etf_universe_industry_only.json b/configs/etf_universe_industry_only.json new file mode 100644 index 0000000..f6a7f3a --- /dev/null +++ b/configs/etf_universe_industry_only.json @@ -0,0 +1,318 @@ +{ + "version": 1, + "description": "Default ETF universe for trend-following (edit ts_code list after verifying tradability).", + "assets": [ + { + "ts_code": "588000.SH", + "asset_class": "equity_cn", + "name": "STAR50 ETF" + }, + { + "ts_code": "510880.SH", + "asset_class": "equity_cn", + "name": "Dividend ETF" + }, + { + "ts_code": "513100.SH", + "asset_class": "equity_qdii", + "name": "NASDAQ100 ETF (QDII)" + }, + { + "ts_code": "513500.SH", + "asset_class": "equity_qdii", + "name": "S&P 500 ETF (QDII)" + }, + { + "ts_code": "513800.SH", + "asset_class": "equity_qdii", + "name": "Nikkei 225 ETF (QDII)" + }, + { + "ts_code": "513030.SH", + "asset_class": "equity_qdii", + "name": "Germany ETF (QDII)" + }, + { + "ts_code": "513310.SH", + "asset_class": "equity_cn_sector", + "name": "\u4e2d\u97e9\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "588200.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u82af\u7247ETF" + }, + { + "ts_code": "515880.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF" + }, + { + "ts_code": "159994.SZ", + "asset_class": "equity_cn_sector", + "name": "5GETF" + }, + { + "ts_code": "561330.SH", + "asset_class": "equity_cn_sector", + "name": "\u77ff\u4e1aETF" + }, + { + "ts_code": "512400.SH", + "asset_class": "equity_cn_sector", + "name": "\u6709\u8272\u91d1\u5c5eETF" + }, + { + "ts_code": "516150.SH", + "asset_class": "equity_cn_sector", + "name": "\u7a00\u571fETF\u5609\u5b9e" + }, + { + "ts_code": "588010.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF" + }, + { + "ts_code": "516800.SH", + "asset_class": "equity_cn_sector", + "name": "\u667a\u80fd\u5236\u9020ETF" + }, + { + "ts_code": "562500.SH", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "159667.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5de5\u4e1a\u6bcd\u673aETF" + }, + { + "ts_code": "512710.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5\u9f99\u5934ETF" + }, + { + "ts_code": "159732.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "588790.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521bAIETF\u535a\u65f6" + }, + { + "ts_code": "512480.SH", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "159516.SZ", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53\u8bbe\u5907ETF" + }, + { + "ts_code": "159995.SZ", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "512760.SH", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "515050.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF\u534e\u590f" + }, + { + "ts_code": "159583.SZ", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1\u8bbe\u5907ETF" + }, + { + "ts_code": "159811.SZ", + "asset_class": "equity_cn_sector", + "name": "5G50ETF" + }, + { + "ts_code": "512660.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF" + }, + { + "ts_code": "512680.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF\u5e7f\u53d1" + }, + { + "ts_code": "159530.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "159770.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "562950.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "561600.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "515070.SH", + "asset_class": "equity_cn_sector", + "name": "\u4eba\u5de5\u667a\u80fdAIETF" + }, + { + "ts_code": "512930.SH", + "asset_class": "equity_cn_sector", + "name": "AI\u4eba\u5de5\u667a\u80fdETF" + }, + { + "ts_code": "159852.SZ", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "515230.SH", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "513120.SH", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159570.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u901a\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159892.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6052\u751f\u533b\u836fETF" + }, + { + "ts_code": "512010.SH", + "asset_class": "equity_cn_sector", + "name": "\u533b\u836fETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "516160.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90ETF" + }, + { + "ts_code": "515030.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90\u8f66ETF" + }, + { + "ts_code": "515790.SH", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159857.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159840.SZ", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "561160.SH", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "159755.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c60ETF" + }, + { + "ts_code": "159796.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c6050ETF" + }, + { + "ts_code": "159761.SZ", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u6750\u659950ETF" + }, + { + "ts_code": "588160.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF\u5357\u65b9" + }, + { + "ts_code": "518880.SH", + "asset_class": "commodity_precious", + "name": "Gold ETF" + }, + { + "ts_code": "159980.SZ", + "asset_class": "commodity_metals", + "name": "Non-ferrous / Metals ETF" + }, + { + "ts_code": "159985.SZ", + "asset_class": "commodity_agri", + "name": "Soymeal ETF" + }, + { + "ts_code": "159870.SZ", + "asset_class": "commodity_chem", + "name": "Chemicals ETF" + }, + { + "ts_code": "159690.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u8272\u77ff\u4e1aETF\u62db\u5546" + }, + { + "ts_code": "560860.SH", + "asset_class": "commodity_cn", + "name": "\u5de5\u4e1a\u6709\u8272ETF" + }, + { + "ts_code": "159652.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u827250ETF" + }, + { + "ts_code": "516780.SH", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159713.SZ", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + } + ], + "constraints": { + "max_positions": 3, + "must_include": { + "equity": 1, + "rates": 0, + "commodity": 0 + }, + "risk_proxy": "588000.SH", + "rates_fallback": "511010.SH", + "backtest_default_start": "20200101", + "backtest_default_end": "20251231" + } +} diff --git a/configs/etf_universe_industry_profiled.json b/configs/etf_universe_industry_profiled.json new file mode 100644 index 0000000..c181fca --- /dev/null +++ b/configs/etf_universe_industry_profiled.json @@ -0,0 +1,318 @@ +{ + "version": 1, + "description": "Default ETF universe for trend-following (edit ts_code list after verifying tradability).", + "assets": [ + { + "ts_code": "588000.SH", + "asset_class": "equity_cn", + "name": "STAR50 ETF" + }, + { + "ts_code": "513310.SH", + "asset_class": "equity_cn_sector", + "name": "\u4e2d\u97e9\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "588200.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u82af\u7247ETF" + }, + { + "ts_code": "515880.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF" + }, + { + "ts_code": "159994.SZ", + "asset_class": "equity_cn_sector", + "name": "5GETF" + }, + { + "ts_code": "561330.SH", + "asset_class": "equity_cn_sector", + "name": "\u77ff\u4e1aETF" + }, + { + "ts_code": "512400.SH", + "asset_class": "equity_cn_sector", + "name": "\u6709\u8272\u91d1\u5c5eETF" + }, + { + "ts_code": "516150.SH", + "asset_class": "equity_cn_sector", + "name": "\u7a00\u571fETF\u5609\u5b9e" + }, + { + "ts_code": "588010.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF" + }, + { + "ts_code": "516800.SH", + "asset_class": "equity_cn_sector", + "name": "\u667a\u80fd\u5236\u9020ETF" + }, + { + "ts_code": "562500.SH", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "159667.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5de5\u4e1a\u6bcd\u673aETF" + }, + { + "ts_code": "512710.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5\u9f99\u5934ETF" + }, + { + "ts_code": "159732.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "512480.SH", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "159516.SZ", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53\u8bbe\u5907ETF" + }, + { + "ts_code": "159995.SZ", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "512760.SH", + "asset_class": "equity_cn_sector", + "name": "\u82af\u7247ETF" + }, + { + "ts_code": "515050.SH", + "asset_class": "equity_cn_sector", + "name": "\u901a\u4fe1ETF\u534e\u590f" + }, + { + "ts_code": "159811.SZ", + "asset_class": "equity_cn_sector", + "name": "5G50ETF" + }, + { + "ts_code": "512660.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF" + }, + { + "ts_code": "512680.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF\u5e7f\u53d1" + }, + { + "ts_code": "159770.SZ", + "asset_class": "equity_cn_sector", + "name": "\u673a\u5668\u4ebaETF" + }, + { + "ts_code": "562950.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "561600.SH", + "asset_class": "equity_cn_sector", + "name": "\u6d88\u8d39\u7535\u5b50ETF" + }, + { + "ts_code": "515070.SH", + "asset_class": "equity_cn_sector", + "name": "\u4eba\u5de5\u667a\u80fdAIETF" + }, + { + "ts_code": "512930.SH", + "asset_class": "equity_cn_sector", + "name": "AI\u4eba\u5de5\u667a\u80fdETF" + }, + { + "ts_code": "159852.SZ", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "515230.SH", + "asset_class": "equity_cn_sector", + "name": "\u8f6f\u4ef6ETF" + }, + { + "ts_code": "513120.SH", + "asset_class": "equity_cn_sector", + "name": "\u6e2f\u80a1\u521b\u65b0\u836fETF" + }, + { + "ts_code": "159892.SZ", + "asset_class": "equity_cn_sector", + "name": "\u6052\u751f\u533b\u836fETF" + }, + { + "ts_code": "516160.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90ETF" + }, + { + "ts_code": "515030.SH", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u80fd\u6e90\u8f66ETF" + }, + { + "ts_code": "515790.SH", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159857.SZ", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159840.SZ", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "561160.SH", + "asset_class": "equity_cn_sector", + "name": "\u9502\u7535\u6c60ETF" + }, + { + "ts_code": "159755.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c60ETF" + }, + { + "ts_code": "159796.SZ", + "asset_class": "equity_cn_sector", + "name": "\u7535\u6c6050ETF" + }, + { + "ts_code": "159761.SZ", + "asset_class": "equity_cn_sector", + "name": "\u65b0\u6750\u659950ETF" + }, + { + "ts_code": "588160.SH", + "asset_class": "equity_cn_sector", + "name": "\u79d1\u521b\u65b0\u6750\u6599ETF\u5357\u65b9" + }, + { + "ts_code": "159690.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u8272\u77ff\u4e1aETF\u62db\u5546" + }, + { + "ts_code": "560860.SH", + "asset_class": "commodity_cn", + "name": "\u5de5\u4e1a\u6709\u8272ETF" + }, + { + "ts_code": "159652.SZ", + "asset_class": "commodity_cn", + "name": "\u6709\u827250ETF" + }, + { + "ts_code": "516780.SH", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159713.SZ", + "asset_class": "commodity_cn", + "name": "\u7a00\u571fETF" + }, + { + "ts_code": "159792.SZ", + "asset_class": "equity_sector", + "name": "\u6e2f\u80a1\u4e92\u8054\u7f51" + }, + { + "ts_code": "515580.SH", + "asset_class": "equity_sector", + "name": "\u79d1\u6280" + }, + { + "ts_code": "159740.SZ", + "asset_class": "equity_sector", + "name": "\u6052\u751f\u79d1\u6280" + }, + { + "ts_code": "159998.SZ", + "asset_class": "equity_sector", + "name": "\u8ba1\u7b97\u673a" + }, + { + "ts_code": "159890.SZ", + "asset_class": "equity_sector", + "name": "\u4e91\u8ba1\u7b97" + }, + { + "ts_code": "159786.SZ", + "asset_class": "equity_sector", + "name": "VR" + }, + { + "ts_code": "512980.SH", + "asset_class": "equity_sector", + "name": "\u4f20\u5a92" + }, + { + "ts_code": "159869.SZ", + "asset_class": "equity_sector", + "name": "\u6e38\u620f" + }, + { + "ts_code": "516620.SH", + "asset_class": "equity_sector", + "name": "\u5f71\u89c6" + }, + { + "ts_code": "159766.SZ", + "asset_class": "equity_sector", + "name": "\u65c5\u6e38" + }, + { + "ts_code": "159709.SZ", + "asset_class": "equity_sector", + "name": "\u7269\u8054\u7f51" + }, + { + "ts_code": "515220.SH", + "asset_class": "equity_sector", + "name": "\u7164\u70ad" + }, + { + "ts_code": "159608.SZ", + "asset_class": "equity_sector", + "name": "\u7a00\u6709\u91d1\u5c5e" + }, + { + "ts_code": "161226.SZ", + "asset_class": "commodity", + "name": "\u767d\u94f6" + } + ], + "constraints": { + "max_positions": 3, + "must_include": { + "equity": 1, + "rates": 0, + "commodity": 0 + }, + "risk_proxy": "588000.SH", + "rates_fallback": "511010.SH", + "backtest_default_start": "20200101", + "backtest_default_end": "20251231" + } +} diff --git a/configs/etf_universe_proxy.json b/configs/etf_universe_proxy.json new file mode 100644 index 0000000..75c5ba8 --- /dev/null +++ b/configs/etf_universe_proxy.json @@ -0,0 +1,73 @@ +{ + "version": 1, + "description": "Default ETF universe for trend-following (edit ts_code list after verifying tradability).", + "assets": [ + { + "ts_code": "510300.SH", + "asset_class": "equity_cn", + "name": "CSI300 ETF" + }, + { + "ts_code": "510500.SH", + "asset_class": "equity_cn", + "name": "CSI500 ETF" + }, + { + "ts_code": "159915.SZ", + "asset_class": "equity_cn", + "name": "ChiNext ETF" + }, + { + "ts_code": "511010.SH", + "asset_class": "rates", + "name": "Treasury ETF" + }, + { + "ts_code": "512480.SH", + "asset_class": "equity_cn_sector", + "name": "\u534a\u5bfc\u4f53ETF" + }, + { + "ts_code": "512660.SH", + "asset_class": "equity_cn_sector", + "name": "\u519b\u5de5ETF" + }, + { + "ts_code": "515070.SH", + "asset_class": "equity_cn_sector", + "name": "\u4eba\u5de5\u667a\u80fdAIETF" + }, + { + "ts_code": "515790.SH", + "asset_class": "equity_cn_sector", + "name": "\u5149\u4f0fETF" + }, + { + "ts_code": "159934.SZ", + "asset_class": "commodity_cn", + "name": "\u9ec4\u91d1ETF\u6613\u65b9\u8fbe" + }, + { + "ts_code": "161226.SZ", + "asset_class": "commodity_cn", + "name": "\u56fd\u6295\u767d\u94f6LOF" + }, + { + "ts_code": "515220.SH", + "asset_class": "commodity_cn", + "name": "\u7164\u70adETF" + } + ], + "constraints": { + "max_positions": 4, + "must_include": { + "equity": 1, + "rates": 1, + "commodity": 0 + }, + "risk_proxy": "510300.SH", + "rates_fallback": "511010.SH", + "backtest_default_start": "20200101", + "backtest_default_end": "20251231" + } +} diff --git a/configs/tushare.env.example b/configs/tushare.env.example new file mode 100644 index 0000000..b55ed12 --- /dev/null +++ b/configs/tushare.env.example @@ -0,0 +1,3 @@ +# Copy to .env (not committed) and fill in. +TUSHARE_TOKEN= +TUSHARE_TIMEOUT=30 diff --git a/data/etf_trend_equity_2020_2025_daily.parquet b/data/etf_trend_equity_2020_2025_daily.parquet new file mode 100644 index 0000000..c2e6167 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_daily.parquet differ diff --git a/data/etf_trend_equity_2020_2025_ma5x20.parquet b/data/etf_trend_equity_2020_2025_ma5x20.parquet new file mode 100644 index 0000000..ec4d5e5 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_ma5x20.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest.parquet b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest.parquet new file mode 100644 index 0000000..279643f Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_trades.parquet b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_trades.parquet new file mode 100644 index 0000000..6bf4285 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_weights.parquet b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_weights.parquet new file mode 100644 index 0000000..ce4f551 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v10_full_universe_proxybest_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v11_industry_event.parquet b/data/etf_trend_equity_2020_2025_v11_industry_event.parquet new file mode 100644 index 0000000..7fd20d1 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v11_industry_event.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v11_industry_event_trades.parquet b/data/etf_trend_equity_2020_2025_v11_industry_event_trades.parquet new file mode 100644 index 0000000..a5dcf6e Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v11_industry_event_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v11_industry_event_weights.parquet b/data/etf_trend_equity_2020_2025_v11_industry_event_weights.parquet new file mode 100644 index 0000000..e72fcb7 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v11_industry_event_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v12_industry_event_v2.parquet b/data/etf_trend_equity_2020_2025_v12_industry_event_v2.parquet new file mode 100644 index 0000000..23e880b Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v12_industry_event_v2.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v12_industry_event_v2_trades.parquet b/data/etf_trend_equity_2020_2025_v12_industry_event_v2_trades.parquet new file mode 100644 index 0000000..e2922f1 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v12_industry_event_v2_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v12_industry_event_v2_weights.parquet b/data/etf_trend_equity_2020_2025_v12_industry_event_v2_weights.parquet new file mode 100644 index 0000000..6530a5c Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v12_industry_event_v2_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v13_industry_event_defaults.parquet b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults.parquet new file mode 100644 index 0000000..3244db6 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_trades.parquet b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_trades.parquet new file mode 100644 index 0000000..549a609 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_weights.parquet b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_weights.parquet new file mode 100644 index 0000000..5183156 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v13_industry_event_defaults_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v14_profiled_best.parquet b/data/etf_trend_equity_2020_2025_v14_profiled_best.parquet new file mode 100644 index 0000000..40cc73a Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v14_profiled_best.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v14_profiled_best_trades.parquet b/data/etf_trend_equity_2020_2025_v14_profiled_best_trades.parquet new file mode 100644 index 0000000..c5477fe Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v14_profiled_best_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v14_profiled_best_weights.parquet b/data/etf_trend_equity_2020_2025_v14_profiled_best_weights.parquet new file mode 100644 index 0000000..9d8945a Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v14_profiled_best_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm.parquet new file mode 100644 index 0000000..8405ba6 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm2.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm2.parquet new file mode 100644 index 0000000..95592c3 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm2.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm2_trades.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm2_trades.parquet new file mode 100644 index 0000000..9c7add2 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm2_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm2_weights.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm2_weights.parquet new file mode 100644 index 0000000..a0a1859 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm2_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm_trades.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm_trades.parquet new file mode 100644 index 0000000..7d48118 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v15_best_confirm_weights.parquet b/data/etf_trend_equity_2020_2025_v15_best_confirm_weights.parquet new file mode 100644 index 0000000..cf4cc52 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v15_best_confirm_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v2.parquet b/data/etf_trend_equity_2020_2025_v2.parquet new file mode 100644 index 0000000..81eba28 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v2.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v2_trades.parquet b/data/etf_trend_equity_2020_2025_v2_trades.parquet new file mode 100644 index 0000000..a715abe Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v2_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v2_weights.parquet b/data/etf_trend_equity_2020_2025_v2_weights.parquet new file mode 100644 index 0000000..91f728d Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v2_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v3_universe_expand.parquet b/data/etf_trend_equity_2020_2025_v3_universe_expand.parquet new file mode 100644 index 0000000..930c927 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v3_universe_expand.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v3_universe_expand_trades.parquet b/data/etf_trend_equity_2020_2025_v3_universe_expand_trades.parquet new file mode 100644 index 0000000..74f3339 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v3_universe_expand_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v3_universe_expand_weights.parquet b/data/etf_trend_equity_2020_2025_v3_universe_expand_weights.parquet new file mode 100644 index 0000000..178c5d5 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v3_universe_expand_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v4_vol25.parquet b/data/etf_trend_equity_2020_2025_v4_vol25.parquet new file mode 100644 index 0000000..4385555 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v4_vol25.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v4_vol25_trades.parquet b/data/etf_trend_equity_2020_2025_v4_vol25_trades.parquet new file mode 100644 index 0000000..6c09873 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v4_vol25_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v4_vol25_weights.parquet b/data/etf_trend_equity_2020_2025_v4_vol25_weights.parquet new file mode 100644 index 0000000..255c9b7 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v4_vol25_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v5_concentrated.parquet b/data/etf_trend_equity_2020_2025_v5_concentrated.parquet new file mode 100644 index 0000000..bf06ba2 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v5_concentrated.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v5_concentrated_trades.parquet b/data/etf_trend_equity_2020_2025_v5_concentrated_trades.parquet new file mode 100644 index 0000000..854ec2e Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v5_concentrated_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v5_concentrated_weights.parquet b/data/etf_trend_equity_2020_2025_v5_concentrated_weights.parquet new file mode 100644 index 0000000..ff4c19b Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v5_concentrated_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v6_more_concentrated.parquet b/data/etf_trend_equity_2020_2025_v6_more_concentrated.parquet new file mode 100644 index 0000000..ba16504 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v6_more_concentrated.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v6_more_concentrated_trades.parquet b/data/etf_trend_equity_2020_2025_v6_more_concentrated_trades.parquet new file mode 100644 index 0000000..7c490bb Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v6_more_concentrated_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v6_more_concentrated_weights.parquet b/data/etf_trend_equity_2020_2025_v6_more_concentrated_weights.parquet new file mode 100644 index 0000000..f6d49a6 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v6_more_concentrated_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly.parquet b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly.parquet new file mode 100644 index 0000000..f12401b Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_trades.parquet b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_trades.parquet new file mode 100644 index 0000000..499d302 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_weights.parquet b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_weights.parquet new file mode 100644 index 0000000..1d865dc Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v7_swing_trailing_anomaly_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v8_big_universe_macro.parquet b/data/etf_trend_equity_2020_2025_v8_big_universe_macro.parquet new file mode 100644 index 0000000..3d3faaa Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v8_big_universe_macro.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v8_big_universe_macro_trades.parquet b/data/etf_trend_equity_2020_2025_v8_big_universe_macro_trades.parquet new file mode 100644 index 0000000..3d7b5ff Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v8_big_universe_macro_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v8_big_universe_macro_weights.parquet b/data/etf_trend_equity_2020_2025_v8_big_universe_macro_weights.parquet new file mode 100644 index 0000000..b81e7fd Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v8_big_universe_macro_weights.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro.parquet b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro.parquet new file mode 100644 index 0000000..d8ec679 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_trades.parquet b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_trades.parquet new file mode 100644 index 0000000..c3218e9 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_trades.parquet differ diff --git a/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_weights.parquet b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_weights.parquet new file mode 100644 index 0000000..106d9c2 Binary files /dev/null and b/data/etf_trend_equity_2020_2025_v9_gridbest_nomacro_weights.parquet differ diff --git a/data/etf_trend_equity_2024.parquet b/data/etf_trend_equity_2024.parquet new file mode 100644 index 0000000..78a2275 Binary files /dev/null and b/data/etf_trend_equity_2024.parquet differ diff --git a/data/etf_trend_equity_2024_daily.parquet b/data/etf_trend_equity_2024_daily.parquet new file mode 100644 index 0000000..45ca0de Binary files /dev/null and b/data/etf_trend_equity_2024_daily.parquet differ diff --git a/data/experiment_ledger.parquet b/data/experiment_ledger.parquet new file mode 100644 index 0000000..b1b87d1 Binary files /dev/null and b/data/experiment_ledger.parquet differ diff --git a/data/experiments.sqlite b/data/experiments.sqlite new file mode 100644 index 0000000..223cb4c Binary files /dev/null and b/data/experiments.sqlite differ diff --git a/data/experiments.sqlite-shm b/data/experiments.sqlite-shm new file mode 100644 index 0000000..e1b3b8b Binary files /dev/null and b/data/experiments.sqlite-shm differ diff --git a/data/experiments.sqlite-wal b/data/experiments.sqlite-wal new file mode 100644 index 0000000..676dbac Binary files /dev/null and b/data/experiments.sqlite-wal differ diff --git a/data/grid_search_results_v2.parquet b/data/grid_search_results_v2.parquet new file mode 100644 index 0000000..b405dba Binary files /dev/null and b/data/grid_search_results_v2.parquet differ diff --git a/data/grid_stage_a.parquet b/data/grid_stage_a.parquet new file mode 100644 index 0000000..b4ac5ba Binary files /dev/null and b/data/grid_stage_a.parquet differ diff --git a/data/opt_grid_20260306T085113Z.log b/data/opt_grid_20260306T085113Z.log new file mode 100644 index 0000000..ef1f79f --- /dev/null +++ b/data/opt_grid_20260306T085113Z.log @@ -0,0 +1,17 @@ +grid combos 13436928 > 128; sampling combos +progress valid=25 best_ann=0.2903 +progress valid=50 best_ann=0.2903 +progress valid=75 best_ann=0.2903 + ann_return ann_vol max_drawdown sharpe trades_per_year sma_fast sma_slow lazy_days min_hold_days replace_score_gap min_score macro_min_breadth macro_down_frac desired_positions_min atr_mult stop_loss_atr profit_tighten_atr atr_mult_profit bias_exit vol_ratio_exit + 0.200517 0.245594 -0.243440 0.816457 64.166667 3 30 4 2 1.6 0.2 0.15 0.75 2 3.2 3.2 6.0 1.5 0.25 3.0 + 0.197072 0.237816 -0.254923 0.828676 70.833333 5 20 4 3 1.6 0.2 0.15 0.85 2 4.0 2.0 8.0 2.0 0.18 4.0 + 0.195256 0.222875 -0.249820 0.876078 67.833333 5 30 4 5 0.8 0.4 0.10 0.85 2 3.2 3.2 6.0 2.0 0.12 3.0 + 0.190109 0.240966 -0.224561 0.788943 64.333333 5 30 4 5 0.5 0.2 0.20 0.80 1 3.2 2.0 6.0 2.5 0.18 3.0 + 0.187824 0.248815 -0.209421 0.754876 50.666667 5 30 6 2 0.5 0.2 0.15 0.75 1 4.0 3.2 8.0 2.5 0.25 4.0 + 0.185986 0.245787 -0.248610 0.756697 60.000000 3 30 4 3 0.5 0.2 0.10 0.75 1 2.5 3.2 4.0 2.5 0.25 3.0 + 0.178070 0.236205 -0.249916 0.753881 78.500000 5 30 8 2 1.2 0.0 0.20 0.75 2 3.2 2.5 8.0 2.5 0.12 4.0 + 0.170204 0.240899 -0.220588 0.706536 59.000000 3 30 6 2 0.5 0.2 0.15 0.80 2 4.0 3.2 8.0 2.5 0.18 4.0 + 0.161948 0.218909 -0.279160 0.739797 73.666667 5 15 8 3 0.5 0.2 0.10 0.80 1 3.2 2.5 8.0 1.5 0.12 3.0 + 0.158855 0.268620 -0.341986 0.591376 71.166667 3 30 6 5 0.5 0.0 0.10 0.85 2 4.0 3.2 6.0 2.5 0.25 4.0 + 0.157707 0.243436 -0.255179 0.647837 65.333333 5 20 4 5 1.2 0.2 0.10 0.80 1 3.2 2.5 4.0 2.5 0.25 3.0 + 0.156077 0.238188 -0.224309 0.655271 51.000000 5 30 5 3 0.5 0.4 0.15 0.80 2 3.2 2.0 4.0 1.5 0.18 3.0 diff --git a/data/opt_state.json b/data/opt_state.json new file mode 100644 index 0000000..1b147b9 --- /dev/null +++ b/data/opt_state.json @@ -0,0 +1,678 @@ +{ + "best": { + "ann_return": 0.29919773587636556, + "ann_vol": 0.2580085666560993, + "max_drawdown": -0.19941147939677484, + "sharpe": 1.1596426419250159, + "trades_per_year": 71.16666666666667, + "max_positions": 3, + "desired_positions_min": 1, + "desired_positions_max": 3, + "rebalance_every": 1, + "replace_score_gap": 0.5, + "max_replaces_per_day": 1, + "sma_fast": 3, + "sma_slow": 30, + "atr_window": 14, + "atr_mult": 2.8, + "profit_tighten_atr": 4.0, + "atr_mult_profit": 1.5, + "stop_loss_atr": 3.6, + "macro_min_breadth": 0.15, + "macro_down_frac": 0.85, + "macro_scale_risk_off": 0.0, + "bias_window": 20, + "bias_exit": 0.25, + "vol_short": 5, + "vol_long": 20, + "vol_ratio_exit": 3.0, + "min_score": 0.0, + "score_vol_denom_floor": 0.02, + "trend_strength_weight": 0.6, + "w_r5": 0.25, + "w_r20": 0.45, + "w_r60": 0.2, + "w_r120": 0.1, + "min_history_days": 120, + "cooldown_days": 5, + "min_hold_days": 3, + "lazy_days": 8, + "rebalance_band": 0.06, + "vol_window": 20, + "max_weight_per_asset": 0.9, + "concentration_power": 2.2, + "port_vol_window": 60, + "target_ann_vol": 0.25, + "new_asset_days": 30, + "new_asset_max_w": 0.2, + "trial": 12, + "seed": 1772857400 + }, + "last_reported_ann_return": 0.26174076561443793, + "history": [ + { + "start": "20200101", + "end": "20251231", + "trials": 60, + "best_ann_return": 0.11864826513365356 + }, + { + "start": "20220101", + "end": "20251231", + "trials": 10, + "best_ann_return": 0.11864826513365356 + }, + { + "start": "20220101", + "end": "20251231", + "trials": 20, + "best_ann_return": 0.14307179229636358 + }, + { + "start": "20220101", + "end": "20251231", + "trials": 120, + "best_ann_return": 0.14307179229636358 + }, + { + "start": "20220101", + "end": "20251231", + "trials": 120, + "best_ann_return": 0.14307179229636358 + }, + { + "start": "20220101", + "end": "20251231", + "trials": 40, + "best_ann_return": 0.14307179229636358 + }, + { + "start": "20200101", + "end": "20251231", + "trials": 40, + "best_ann_return": 0.14307179229636358 + }, + { + "timestamp": "2026-03-05T13:27:54.064975+00:00", + "config": "configs/etf_universe_industry_only.json", + "start": "20200101", + "end": "20251231", + "trials": 240, + "jobs": 8, + "best_ann_return": 0.26174076561443793 + }, + { + "timestamp": "2026-03-06T01:04:17.263332+00:00", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 480, + "jobs": 8, + "best_ann_return": 0.2903126188408862 + }, + { + "timestamp": "2026-03-06T02:18:52.288087+00:00", + "run_id": "20260306T020054Z_seed7", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 240, + "jobs": 8, + "best_ann_return": 0.2903126188408862, + "db": "data/experiments.sqlite" + }, + { + "timestamp": "2026-03-06T09:09:11.754945+00:00", + "run_id": "20260306T085114Z_seed20260306", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 100000, + "jobs": 8, + "best_ann_return": 0.2903126188408862, + "db": "data/experiments.sqlite" + }, + { + "timestamp": "2026-03-07T02:20:53.525889+00:00", + "run_id": "20260307T021714Z_seed1772849832", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.2903126188408862, + "db": "data/experiments.sqlite" + }, + { + "timestamp": "2026-03-07T03:15:34.452214+00:00", + "run_id": "20260307T025817Z_bestlocal_seed1772852295_stops", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.2941324263568994, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "stops" + ] + }, + { + "timestamp": "2026-03-07T03:59:43.476374+00:00", + "run_id": "20260307T034217Z_bestlocal_seed1772854935_stops", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.2941324263568994, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "stops" + ] + }, + { + "timestamp": "2026-03-07T04:19:16.669791+00:00", + "run_id": "20260307T040158Z_bestlocal_seed1772856116_churn", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.2941324263568994, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "churn" + ] + }, + { + "timestamp": "2026-03-07T04:40:34.441399+00:00", + "run_id": "20260307T042322Z_bestlocal_seed1772857400_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-07T05:20:53.615457+00:00", + "run_id": "20260307T050352Z_bestlocal_seed1772859830_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-07T07:49:16.151072+00:00", + "run_id": "20260307T073200Z_bestlocal_seed1772868718_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-07T08:53:25.642488+00:00", + "run_id": "20260307T083612Z_bestlocal_seed1772872570_stops", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "stops" + ] + }, + { + "timestamp": "2026-03-07T09:13:42.148802+00:00", + "run_id": "20260307T085632Z_bestlocal_seed1772873790_stops", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "stops" + ] + }, + { + "timestamp": "2026-03-07T09:35:15.356660+00:00", + "run_id": "20260307T091729Z_bestlocal_seed1772875047_score", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "score" + ] + }, + { + "timestamp": "2026-03-07T09:59:47.627321+00:00", + "run_id": "20260307T094156Z_bestlocal_seed1772876514_score", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "score" + ] + }, + { + "timestamp": "2026-03-07T10:25:19.359544+00:00", + "run_id": "20260307T100722Z_bestlocal_seed1772878040_switches", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "switches" + ] + }, + { + "timestamp": "2026-03-07T10:58:27.176353+00:00", + "run_id": "20260307T104037Z_bestlocal_seed1772880035_positions", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "positions" + ] + }, + { + "timestamp": "2026-03-07T13:25:50.785672+00:00", + "run_id": "20260307T130802Z_bestlocal_seed1772888881_stops", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "stops" + ] + }, + { + "timestamp": "2026-03-08T00:23:31.144317+00:00", + "run_id": "20260308T000519Z_bestlocal_seed1772928317_exits", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + }, + { + "timestamp": "2026-03-08T00:38:33.229561+00:00", + "run_id": "20260308T003740Z_bestlocal_seed1772930258_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 1, + "jobs": 1, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-08T01:01:47.797863+00:00", + "run_id": "20260308T004337Z_bestlocal_seed1772930615_exits", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + }, + { + "timestamp": "2026-03-08T01:19:15.300848+00:00", + "run_id": "20260308T011822Z_bestlocal_seed1772932700_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 1, + "jobs": 1, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-08T01:37:54.016182+00:00", + "run_id": "20260308T011935Z_bestlocal_seed1772932773_exits", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + }, + { + "timestamp": "2026-03-08T02:58:41.399930+00:00", + "run_id": "20260308T024027Z_bestlocal_seed1772937625_exits", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + }, + { + "timestamp": "2026-03-08T10:23:23.738860+00:00", + "run_id": "20260308T100515Z_bestlocal_seed1772964314_switches", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "switches" + ] + }, + { + "timestamp": "2026-03-08T11:18:06.874222+00:00", + "run_id": "20260308T110022Z_bestlocal_seed1772967620_switches2", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "switches2" + ] + }, + { + "timestamp": "2026-03-08T12:22:32.264081+00:00", + "run_id": "20260308T120437Z_bestlocal_seed1772971475_signal1", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "signal1" + ] + }, + { + "timestamp": "2026-03-08T13:10:31.892686+00:00", + "run_id": "20260308T125232Z_bestlocal_seed1772974350_orth_ma", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "orth_ma" + ] + }, + { + "timestamp": "2026-03-08T13:30:15.331445+00:00", + "run_id": "20260308T131211Z_bestlocal_seed1772975529_orth_weights", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "orth_weights" + ] + }, + { + "timestamp": "2026-03-08T13:51:18.164438+00:00", + "run_id": "20260308T133320Z_bestlocal_seed1772976798_orth_mech", + "code_version": "nogit", + "config": "/home/openclaw/projects/quant-factor-research/configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "/home/openclaw/projects/quant-factor-research/data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "orth_mech" + ] + }, + { + "timestamp": "2026-03-08T14:19:04.111061+00:00", + "run_id": "20260308T140111Z_bestlocal_seed1772978469_asym_fast", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "asym_fast" + ] + }, + { + "timestamp": "2026-03-09T01:44:48.718246+00:00", + "run_id": "20260309T012652Z_bestlocal_seed1773019610_asym_fast", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "asym_fast" + ] + }, + { + "timestamp": "2026-03-09T02:04:28.023489+00:00", + "run_id": "20260309T014623Z_bestlocal_seed1773020781_asym_fast", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "asym_fast" + ] + }, + { + "timestamp": "2026-03-09T05:59:47.135954+00:00", + "run_id": "20260309T054128Z_bestlocal_seed1773034886_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-09T06:51:14.103824+00:00", + "run_id": "20260309T063300Z_bestlocal_seed3511452665_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-09T07:41:55.288441+00:00", + "run_id": "20260309T072400Z_bestlocal_seed3514512955_macro", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "macro" + ] + }, + { + "timestamp": "2026-03-09T08:11:58.360040+00:00", + "run_id": "20260309T075340Z_bestlocal_seed3516292556_exits", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + }, + { + "timestamp": "2026-03-09T08:33:46.207270+00:00", + "run_id": "20260309T081519Z_bestlocal_seed3517591585_exits", + "code_version": "nogit", + "config": "configs/etf_universe_industry_profiled.json", + "start": "20200101", + "end": "20251231", + "trials": 20, + "jobs": 6, + "best_ann_return": 0.29919773587636556, + "db": "data/experiments.sqlite", + "base_from": "opt_state.best", + "tweaks": [ + "exits" + ] + } + ] +} diff --git a/data/tune_results_2020_2025.parquet b/data/tune_results_2020_2025.parquet new file mode 100644 index 0000000..3450d6a Binary files /dev/null and b/data/tune_results_2020_2025.parquet differ diff --git a/data/tune_results_fast_2020_2025.parquet b/data/tune_results_fast_2020_2025.parquet new file mode 100644 index 0000000..578a261 Binary files /dev/null and b/data/tune_results_fast_2020_2025.parquet differ diff --git a/data/universe_profile_extended.parquet b/data/universe_profile_extended.parquet new file mode 100644 index 0000000..ad81624 Binary files /dev/null and b/data/universe_profile_extended.parquet differ diff --git a/docs/ETF_TREND_SYSTEM.md b/docs/ETF_TREND_SYSTEM.md new file mode 100644 index 0000000..9b96f79 --- /dev/null +++ b/docs/ETF_TREND_SYSTEM.md @@ -0,0 +1,63 @@ +# ETF Trend System (K<=4, no leverage) - v2 + +This is a daily-signal / daily-rebalance trend-following system on a configurable ETF universe. + +It is designed for: + +- K<=4 holdings +- no leverage (net exposure <= 100%) +- portfolio vol cap (de-risk only), remainder parked in a rates ETF +- practical execution hygiene (cooldown / new listing protection / turnover band) + +## Signals + +- Trend filter (entry universe): MA(fast) > MA(slow) + - default: MA5 > MA20 +- Ranking score (higher is better): + + `score = (0.5*R20 + 0.3*R60 + 0.2*R120) / max(vol20, floor) + 0.5*trend_strength` + + where `trend_strength = ma_fast/ma_slow - 1`. + +## Entry + +On each rebalance day (daily): + +- Candidate must satisfy: + - `trend_ok == True` (MA cross) + - `score >= min_score` + - `min_history_days` protection (skip too-new series) + - `cooldown_days` protection (after exit, avoid immediate re-entry) + +## Position Sizing + +- Risk parity on `vol20` across selected holdings. +- Per-asset cap: `max_weight_per_asset` (default 0.50) +- Portfolio vol cap (no leverage): + + `scale = min(1, target_ann_vol / port_vol(port_vol_window))` + + Remaining weight (1 - sum(weights)) is parked in `rates_fallback`. + +## Exits (checked daily) + +A position exits if any triggers: + +- Trend break: MA(fast) < MA(slow) +- Chandelier stop: close < highest_close - atr_mult*ATR +- Stop loss from entry: close < entry_price - stop_loss_atr*ATR +- Take profit from entry: close > entry_price + take_profit_atr*ATR + +## Trading Hygiene + +- `rebalance_band`: ignore small weight changes to reduce churn. +- `min_hold_days`: do not rebalance-sell a very fresh position (risk exits still apply). +- `new_asset_days/new_asset_max_w`: cap weight of a newly-eligible asset for its first N tradable days after it passes the history gate. + +## Outputs + +Backtest runner writes 3 artifacts: + +- equity curve parquet: `data/etf_trend_equity_*.parquet` +- weights parquet: `data/etf_trend_equity_*_weights.parquet` +- trades parquet: `data/etf_trend_equity_*_trades.parquet` diff --git a/docs/FACTOR_PIPELINE.md b/docs/FACTOR_PIPELINE.md new file mode 100644 index 0000000..a469d92 --- /dev/null +++ b/docs/FACTOR_PIPELINE.md @@ -0,0 +1,18 @@ +# Factor Pipeline (Draft) + +1) Load universe + prices + fundamentals +2) Compute raw factor values +3) Clean: + - missing handling + - winsorize (per-date cross section) + - z-score (per-date cross section) +4) Neutralize (optional): + - industry + - size +5) Evaluate: + - IC / Rank IC + - decay + - turnover +6) Backtest: + - long-short / top-k + - transaction costs diff --git a/docs/TUSHARE.md b/docs/TUSHARE.md new file mode 100644 index 0000000..d755677 --- /dev/null +++ b/docs/TUSHARE.md @@ -0,0 +1,28 @@ +# Tushare Integration + +## Setup + +1) Add token + +- Create `/home/openclaw/projects/quant-factor-research/.env`: + + - `TUSHARE_TOKEN=...` + - `TUSHARE_TIMEOUT=30` + +Template: `configs/tushare.env.example` + +2) Install dependency into conda env + +- `conda activate qfr` +- Prefer conda-forge where possible; but `tushare` is usually pip: + - `pip install tushare` + +## Download daily bars + +Example: + +- `python scripts/tushare_download_daily.py --ts-code 000001.SZ --start 20250101 --end 20250131 --out data/raw/000001SZ_202501.parquet` + +Notes: + +- Tushare API has rate limits based on your account积分. Cache results locally. diff --git a/docs/dev-flow-checklist.md b/docs/dev-flow-checklist.md new file mode 100644 index 0000000..674cf8d --- /dev/null +++ b/docs/dev-flow-checklist.md @@ -0,0 +1,69 @@ +# QFR 策略开发最小闭环(Checklist) + +目标:让每次改动都能被复现、复算、对比、落库,避免只看日志导致的错觉与过拟合。 + +## 0) 约定 + +- 唯一入口:所有优化结果必须可用 scripts/run_etf_trend_backtest.py 复算。 +- 固定数据窗:同一次实验必须固定 start/end 与 configs 下的 universe json。 +- 落库优先:优化与复算都要写入 data/experiments.sqlite(或输出可追溯 artifacts)。 + +## 1) 提出假设(写清楚再改) + +- 本次改动想提升什么?(ann_return / max_drawdown / ann_vol / sharpe / trades_per_year) +- 风险约束是什么?(例如:max_trades_per_year <= 80,回撤不恶化超过阈值) +- 预期影响:趋势/均值回归/风险控制/换仓逻辑/过滤条件 哪一块在起作用? + +## 2) 实现改动 + 基线自检 + +- 运行一次基线回测(固定 config + 时间窗): + - python scripts/run_etf_trend_backtest.py --config --start --end +- 确认输出 artifacts: + - data/etf_trend_equity.parquet + - data/etf_trend_equity_weights.parquet + - data/etf_trend_equity_trades.parquet(如有) + +## 3) 搜索/优化(iterate_optimize) + +- 固定参数:seed、start/end、config、rawdir +- 记录 run_id(建议用时间戳) +- 让优化写库:data/experiments.sqlite + +## 4) Top-N 复算(必须做) + +目的:避免优化器算出来的 top config 因入口不同/代码变更/数据差异而不可复现。 + +- 复算命令: + - python scripts/verify_topn.py --db data/experiments.sqlite --topn 10 --config --rawdir data/raw + +输出: +- 每个 trial 的原始指标 vs 复算指标差异 +- 标记不一致(超过容忍阈值)的 trial + +## 5) 更新 best 与汇报规则 + +- 只有在满足: + - ann_return 相比 last_reported_ann_return 提升 >= 5pp + - 且 Top-N 复算一致 + - 且风险约束不恶化 + +才更新 data/opt_state.json 的 last_reported_ann_return 并对外汇报。 + +## 6) 借鉴四大流派(落成 模块 + 指标) + +- 趋势:多周期一致性、风险调整动量 +- 均值回归:偏离/回归信号(用于降低回撤/提高夏普) +- 风险/宏观:PCA/absorption ratio/相关性升高时降风险 +- 相对价值/结构:强弱腿替换、组内中性、主题子宇宙 + +要求:每个模块都要 +- 可开关(参数化) +- 可记录原因(trades/日志中写入 reason 字段) +- 可对比(A/B vs baseline) + +## 7) 迭代准则(用户确认) + +- 当已有一个还可以的策略(例如年化 25%+)后: + - 必须以该基础策略为主框架逐步叠加技巧与改进 + - 不要换一套完全不同的思路/框架 + - 每次微调的因子不超过 4 个(单次改动可归因、可回滚、可复现) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8faa1c5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "qfr" +version = "0.1.0" +description = "Quant factor research toolkit" +requires-python = ">=3.11" + +[tool.ruff] +line-length = 100 + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..286ef86 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +# Core +numpy +pandas +scipy +statsmodels +scikit-learn + +# Data / IO +pyarrow +pydantic +python-dotenv + +# Viz +matplotlib +seaborn + +# Notebooks +jupyter + +# Dev +pytest +ruff + +# Data sources +tushare diff --git a/scripts/analyze_drawdown.py b/scripts/analyze_drawdown.py new file mode 100644 index 0000000..243bc79 --- /dev/null +++ b/scripts/analyze_drawdown.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +import pandas as pd + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--equity", required=True) + ap.add_argument("--trades", required=True) + ap.add_argument("--top", type=int, default=3) + args = ap.parse_args() + + eq = pd.read_parquet(args.equity) + eq = eq.copy() + eq.index = eq.index.astype(str) + s = eq["equity"].astype(float) + + peak = s.cummax() + dd = s / peak - 1.0 + + # find worst drawdowns by trough + worst = dd.nsmallest(args.top) + + tr = pd.read_parquet(args.trades) + tr = tr.copy() + tr["trade_date"] = tr["trade_date"].astype(str) + + for d, v in worst.items(): + # drawdown start = last peak before d + peak_date = (s.loc[:d]).idxmax() + print("---") + print("trough", d, "dd", float(v)) + print("peak", peak_date, "peak_equity", float(s.loc[peak_date]), "trough_equity", float(s.loc[d])) + w = tr[(tr["trade_date"] >= peak_date) & (tr["trade_date"] <= d)] + print("trades in window", len(w)) + if not w.empty: + cols = [c for c in ["trade_date", "ts_code", "side", "reason", "weight_before", "weight_after", "price"] if c in w.columns] + print(w[cols].tail(25).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/auto_tune_etf_trend.py b/scripts/auto_tune_etf_trend.py new file mode 100644 index 0000000..3dc62b4 --- /dev/null +++ b/scripts/auto_tune_etf_trend.py @@ -0,0 +1,164 @@ +from __future__ import annotations + +import argparse +import itertools +import json +from dataclasses import replace +from pathlib import Path + +import numpy as np +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy", "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + + return universe, constraints, risk_proxy, rates_fallback + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252 ** 0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar} + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--config", default="configs/etf_universe.json") + p.add_argument("--rawdir", default="data/raw") + p.add_argument("--start", default="20200101") + p.add_argument("--end", default="20251231") + p.add_argument("--out", default="data/tune_results.parquet") + args = p.parse_args() + + config_path = Path(args.config) + raw_dir = Path(args.rawdir) + + universe, constraints, risk_proxy, rates_fallback = load_universe(config_path) + prices = load_prices(raw_dir, universe, args.start, args.end) + + base = TrendParams() + + # small grid to keep runtime reasonable + fast_list = [5, 10] + slow_list = [20, 40] + atr_mult_list = [2.5, 3.0] + vol_window_list = [10, 20] + port_vol_window_list = [40, 60] + max_positions_list = [3, 4] + + rows = [] + + for sma_fast, sma_slow, atr_mult, vol_window, port_vol_window, max_positions in itertools.product( + fast_list, + slow_list, + atr_mult_list, + vol_window_list, + port_vol_window_list, + max_positions_list, + ): + if sma_fast >= sma_slow: + continue + + params = replace( + base, + sma_fast=sma_fast, + sma_slow=sma_slow, + atr_mult=atr_mult, + vol_window=vol_window, + port_vol_window=port_vol_window, + max_positions=max_positions, + rebalance_every=1, + ) + + cons = replace(constraints, max_positions=max_positions) + + equity, _weights = run_backtest( + prices, + universe, + cons, + params, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + + st = perf_stats(equity["equity"]) + if not st: + continue + + row = { + "sma_fast": sma_fast, + "sma_slow": sma_slow, + "atr_mult": atr_mult, + "vol_window": vol_window, + "port_vol_window": port_vol_window, + "max_positions": max_positions, + **st, + } + rows.append(row) + + df = pd.DataFrame(rows) + if df.empty: + print("no results") + return + + # filter by vol constraint first, then sort by ann_return + filt = df[df["ann_vol"] <= 0.18].copy() + if filt.empty: + filt = df.copy() + + filt = filt.sort_values(["ann_return", "calmar"], ascending=False) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + filt.to_parquet(out, index=False) + + print("top10") + cols = [ + "ann_return", + "ann_vol", + "max_drawdown", + "calmar", + "sma_fast", + "sma_slow", + "atr_mult", + "vol_window", + "port_vol_window", + "max_positions", + ] + print(filt[cols].head(10).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/auto_tune_etf_trend_fast.py b/scripts/auto_tune_etf_trend_fast.py new file mode 100644 index 0000000..b3d4bc4 --- /dev/null +++ b/scripts/auto_tune_etf_trend_fast.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import argparse +import itertools +import json +from dataclasses import replace +from pathlib import Path + +import numpy as np +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, compute_features, portfolio_vol, risk_parity_weights, select_portfolio + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy", "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, risk_proxy, rates_fallback + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252 ** 0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + calmar = float(ann_ret / abs(dd)) if dd < 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "calmar": calmar} + + +def run_backtest_cached( + feats: dict[str, pd.DataFrame], + universe: list[UniverseAsset], + constraints: Constraints, + params: TrendParams, + rates_fallback: str, + risk_proxy: str, +) -> pd.DataFrame: + # align dates intersection + dates = None + for f in feats.values(): + d = set(f["trade_date"].astype(str)) + dates = d if dates is None else dates.intersection(d) + if not dates: + raise RuntimeError("No overlapping trade_date") + all_dates = sorted(dates) + + close_px = pd.DataFrame(index=all_dates) + ret1 = pd.DataFrame(index=all_dates) + for ts, f in feats.items(): + g = f.set_index("trade_date").reindex(all_dates) + close_px[ts] = g["close"].astype(float) + ret1[ts] = close_px[ts].pct_change().fillna(0.0) + + if risk_proxy not in close_px.columns: + raise RuntimeError("risk_proxy missing") + + weights = pd.DataFrame(0.0, index=all_dates, columns=close_px.columns) + + in_pos: set[str] = set() + highest_close: dict[str, float] = {} + + atr_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["atr"].astype(float) for ts in close_px.columns} + mf_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_fast"].astype(float) for ts in close_px.columns} + ms_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_slow"].astype(float) for ts in close_px.columns} + + last_reb = -10**9 + + for i, d in enumerate(all_dates): + if i > 0: + weights.loc[d] = weights.iloc[i - 1] + + for ts in list(in_pos): + c = float(close_px.loc[d, ts]) + if np.isfinite(c): + highest_close[ts] = max(highest_close.get(ts, c), c) + + # exits + for ts in list(in_pos): + c = float(close_px.loc[d, ts]) + mf = float(mf_map[ts].loc[d]) + ms = float(ms_map[ts].loc[d]) + atr = float(atr_map[ts].loc[d]) + h = highest_close.get(ts, c) + trend_break = (np.isfinite(mf) and np.isfinite(ms) and (mf < ms)) + chand_break = np.isfinite(atr) and c < (h - params.atr_mult * atr) + if trend_break or chand_break: + weights.loc[d, ts] = 0.0 + in_pos.remove(ts) + highest_close.pop(ts, None) + + if (i - last_reb) >= params.rebalance_every: + rows = [] + for ts in close_px.columns: + f = feats[ts].set_index("trade_date").reindex([d]).iloc[0] + rows.append((ts, bool(f["trend_ok"]) if pd.notna(f["trend_ok"]) else False, + float(f["score_raw"]) if pd.notna(f["score_raw"]) else float("nan"), + float(f["vol"]) if pd.notna(f["vol"]) else float("nan"))) + snap = pd.DataFrame(rows, columns=["ts_code", "trend_ok", "score_raw", "vol"]).set_index("ts_code") + + picks = select_portfolio(snap, universe, constraints) + vol = snap.loc[picks, "vol"].copy() + w = risk_parity_weights(vol, max_w=0.50) + + trailing = ret1[picks].iloc[max(0, i - params.port_vol_window + 1) : i + 1] + pvol = portfolio_vol(trailing, w) + scale = 1.0 + if np.isfinite(pvol) and pvol > 0: + scale = min(1.0, params.target_ann_vol / pvol) + + w_exec = w * scale + weights.loc[d] = 0.0 + for ts, wi in w_exec.items(): + weights.loc[d, ts] = float(wi) + + rem = 1.0 - float(w_exec.sum()) + if rem > 1e-12 and rates_fallback in weights.columns: + weights.loc[d, rates_fallback] += rem + + in_pos = {ts for ts in close_px.columns if weights.loc[d, ts] > 1e-12} + for ts in in_pos: + c = float(close_px.loc[d, ts]) + highest_close[ts] = max(highest_close.get(ts, c), c) + + last_reb = i + + w_lag = weights.shift(1).fillna(0.0) + port_ret = (ret1 * w_lag).sum(axis=1) + equity = (1.0 + port_ret).cumprod().to_frame("equity") + return equity + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--out", default="data/tune_results_fast.parquet") + args = ap.parse_args() + + universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config)) + prices = load_prices(Path(args.rawdir), universe, args.start, args.end) + + base = TrendParams(rebalance_every=1) + + # grid (keep small) + fast_list = [3, 5, 8] + slow_list = [15, 20, 30] + atr_mult_list = [2.0, 2.5, 3.0] + vol_window_list = [10, 20] + port_vol_window_list = [40, 60] + max_positions_list = [3, 4] + + rows = [] + + for sma_fast, sma_slow in itertools.product(fast_list, slow_list): + if sma_fast >= sma_slow: + continue + for atr_mult, vol_window, port_vol_window, max_positions in itertools.product( + atr_mult_list, vol_window_list, port_vol_window_list, max_positions_list + ): + params = replace( + base, + max_positions=max_positions, + sma_fast=sma_fast, + sma_slow=sma_slow, + atr_mult=atr_mult, + vol_window=vol_window, + port_vol_window=port_vol_window, + ) + cons = replace(constraints, max_positions=max_positions) + + feats = {ts: compute_features(df, params) for ts, df in prices.items()} + equity = run_backtest_cached(feats, universe, cons, params, rates_fallback, risk_proxy) + st = perf_stats(equity["equity"]) + if not st: + continue + rows.append({ + "sma_fast": sma_fast, + "sma_slow": sma_slow, + "atr_mult": atr_mult, + "vol_window": vol_window, + "port_vol_window": port_vol_window, + "max_positions": max_positions, + **st, + }) + + df = pd.DataFrame(rows) + if df.empty: + print("no results") + return + + filt = df[df["ann_vol"] <= 0.18].sort_values(["ann_return", "calmar"], ascending=False) + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + filt.to_parquet(out, index=False) + + cols = ["ann_return", "ann_vol", "max_drawdown", "calmar", "sma_fast", "sma_slow", "atr_mult", "vol_window", "port_vol_window", "max_positions"] + print("top10") + print(filt[cols].head(10).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/auto_tune_etf_trend_small.py b/scripts/auto_tune_etf_trend_small.py new file mode 100644 index 0000000..611aad6 --- /dev/null +++ b/scripts/auto_tune_etf_trend_small.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import argparse +import json +from dataclasses import replace +from pathlib import Path + +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path): + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + return universe, constraints, cons.get("risk_proxy", "510300.SH"), cons.get("rates_fallback", "511010.SH") + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str): + out = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series): + r = equity.pct_change().dropna() + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252 ** 0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + return ann_ret, ann_vol, dd + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + args = ap.parse_args() + + universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config)) + prices = load_prices(Path(args.rawdir), universe, args.start, args.end) + + base = TrendParams(rebalance_every=1, max_positions=4) + + # A very small candidate set (fast to run) + candidates = [ + (5, 20, 3.0), + (5, 20, 2.5), + (3, 15, 2.5), + (8, 30, 3.0), + (10, 40, 3.0), + (5, 30, 3.0), + ] + + rows = [] + for sma_fast, sma_slow, atr_mult in candidates: + params = replace(base, sma_fast=sma_fast, sma_slow=sma_slow, atr_mult=atr_mult) + equity, _w = run_backtest( + prices, + universe, + constraints, + params, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + ann_ret, ann_vol, dd = perf_stats(equity["equity"]) + rows.append({ + "ann_return": ann_ret, + "ann_vol": ann_vol, + "max_drawdown": dd, + "sma_fast": sma_fast, + "sma_slow": sma_slow, + "atr_mult": atr_mult, + }) + + df = pd.DataFrame(rows).sort_values(["ann_return"], ascending=False) + print(df.to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/expand_etf_universe.py b/scripts/expand_etf_universe.py new file mode 100644 index 0000000..624bafc --- /dev/null +++ b/scripts/expand_etf_universe.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import argparse +import json +import math +from collections import defaultdict +from datetime import date, timedelta +from pathlib import Path + +import pandas as pd + +from qfr.data.tushare_client import load_tushare_config, pro_api + + +def median_amount(cfg, ts_code: str, start: str, end: str) -> float: + api = pro_api(cfg) + df = api.fund_daily(ts_code=ts_code, start_date=start, end_date=end, fields="trade_date,amount") + if df is None or df.empty or "amount" not in df.columns: + return 0.0 + amt = pd.to_numeric(df["amount"], errors="coerce").dropna() + if amt.empty: + return 0.0 + return float(amt.median()) + + +def classify_by_keyword(kw: str) -> str: + # very rough tagging for universe constraints / reporting + equity_kws = { + "半导体", + "芯片", + "通信", + "5G", + "通信设备", + "军工", + "机器人", + "工业母机", + "智能制造", + "消费电子", + "AI", + "算力", + "软件", + "创新药", + "医药", + "新能源", + "光伏", + "锂电", + "电池", + "新材料", + "稀土", + } + commodity_kws = {"黄金", "白银", "有色", "稀土", "矿业", "原油", "油", "煤", "化工", "豆粕", "农业"} + rates_kws = {"国债", "政金债", "债", "短债", "中债"} + + if kw in rates_kws: + return "rates_cn" + if kw in commodity_kws: + return "commodity_cn" + if kw in equity_kws: + return "equity_cn_sector" + return "equity_cn_sector" + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe.json") + ap.add_argument("--out", default=None) + ap.add_argument("--per_keyword", type=int, default=2) + ap.add_argument("--min_median_amount", type=float, default=0.0) + ap.add_argument( + "--keywords", + default=( + "半导体,芯片,通信,5G,通信设备,军工,机器人,工业母机,智能制造,消费电子,AI,算力,软件,创新药,医药,新能源,光伏,锂电,电池," + "矿业,有色,稀土,新材料,黄金,白银,原油,煤,化工,豆粕,农业,国债,政金债" + ), + ) + args = ap.parse_args() + + cfg = load_tushare_config() + api = pro_api(cfg) + + conf_path = Path(args.config) + conf = json.loads(conf_path.read_text(encoding="utf-8")) + + assets = conf.get("assets", []) + have = {a["ts_code"] for a in assets} + + kw_list = [k.strip() for k in str(args.keywords).split(",") if k.strip()] + + fb = api.fund_basic(market="E", status="L", fields="ts_code,name") + if fb is None or fb.empty: + raise RuntimeError("fund_basic returned empty") + + fb = fb.dropna(subset=["ts_code", "name"]).copy() + + end = date.today().strftime("%Y%m%d") + start = (date.today() - timedelta(days=180)).strftime("%Y%m%d") + + buckets: dict[str, list[tuple[str, str]]] = defaultdict(list) + for _, r in fb.iterrows(): + ts_code = str(r["ts_code"]).strip() + name = str(r["name"]).strip() + for kw in kw_list: + if kw in name: + buckets[kw].append((ts_code, name)) + break + + chosen: list[tuple[str, str, str, float, str]] = [] + + for kw in kw_list: + cands = buckets.get(kw, []) + if not cands: + continue + + scored: list[tuple[float, str, str]] = [] + for ts_code, name in cands: + if ts_code in have: + continue + try: + m = median_amount(cfg, ts_code, start, end) + except Exception: + m = 0.0 + if not math.isfinite(m) or m <= 0: + continue + if m < float(args.min_median_amount): + continue + scored.append((m, ts_code, name)) + + scored.sort(reverse=True) + for m, ts_code, name in scored[: int(args.per_keyword)]: + cls = classify_by_keyword(kw) + chosen.append((kw, ts_code, name, m, cls)) + + for kw, ts_code, name, m, cls in chosen: + assets.append({"ts_code": ts_code, "asset_class": cls, "name": name}) + have.add(ts_code) + + conf["assets"] = assets + + out_path = Path(args.out) if args.out else conf_path + out_path.write_text(json.dumps(conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + + print(f"added {len(chosen)} ETFs") + for kw, ts_code, name, m, cls in chosen[:80]: + print(f"{kw}\t{ts_code}\t{m:.0f}\t{cls}\t{name}") + + +if __name__ == "__main__": + main() diff --git a/scripts/filter_universe_by_profile.py b/scripts/filter_universe_by_profile.py new file mode 100644 index 0000000..b32ca5c --- /dev/null +++ b/scripts/filter_universe_by_profile.py @@ -0,0 +1,128 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +import numpy as np +import pandas as pd + + +def load_prices(raw_dir: Path, ts_code: str) -> pd.DataFrame: + fn = raw_dir / f"{ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df.sort_values("trade_date").reset_index(drop=True) + return df + + +def ann_vol(ret1: pd.Series) -> float: + r = ret1.dropna() + if len(r) < 50: + return float("nan") + return float(r.std(ddof=1) * np.sqrt(252.0)) + + +def max_drawdown(close: pd.Series) -> float: + c = close.astype(float) + if c.isna().all() or len(c) < 50: + return float("nan") + eq = c / float(c.iloc[0]) + dd = eq / eq.cummax() - 1.0 + return float(dd.min()) + + +def bias_stats(close: pd.Series, ma_n: int = 20) -> tuple[float, float]: + c = close.astype(float) + ma = c.rolling(ma_n, min_periods=ma_n).mean() + b = (c / ma - 1.0).dropna() + if len(b) < 50: + return float("nan"), float("nan") + return float(b.mean()), float(b.std(ddof=1)) + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe_industry_only.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--window", type=int, default=504, help="profile window in trading days") + ap.add_argument("--out", default="data/universe_profile.parquet") + + # filters (keep high vol, avoid pathological drawdowns) + ap.add_argument("--min_ann_vol", type=float, default=0.18) + ap.add_argument("--max_dd_floor", type=float, default=-0.65, help="drop assets with max_dd < floor") + ap.add_argument("--min_bias_std", type=float, default=0.02) + ap.add_argument("--max_bias_std", type=float, default=0.20) + + ap.add_argument("--top", type=int, default=40, help="how many to keep after scoring") + ap.add_argument("--out_config", default="configs/etf_universe_industry_profiled.json") + args = ap.parse_args() + + conf = json.loads(Path(args.config).read_text(encoding="utf-8")) + assets = conf["assets"] + + raw = Path(args.rawdir) + + rows = [] + for a in assets: + ts = a["ts_code"] + df = load_prices(raw, ts) + df = df[(df["trade_date"] >= args.start) & (df["trade_date"] <= args.end)] + if len(df) < int(args.window) + 50: + continue + + tail = df.tail(int(args.window)) + close = tail["close"].astype(float) + ret1 = close.pct_change() + + v = ann_vol(ret1) + dd = max_drawdown(close) + bmu, bsd = bias_stats(close, 20) + + rows.append( + { + "ts_code": ts, + "name": a.get("name"), + "asset_class": a.get("asset_class"), + "ann_vol": v, + "max_dd": dd, + "bias20_mean": bmu, + "bias20_std": bsd, + } + ) + + prof = pd.DataFrame(rows) + if prof.empty: + raise SystemExit("no assets profiled") + + prof.to_parquet(args.out, index=False) + + # filter + f = prof.copy() + f = f[np.isfinite(f["ann_vol"]) & np.isfinite(f["max_dd"]) & np.isfinite(f["bias20_std"])].copy() + f = f[(f["ann_vol"] >= float(args.min_ann_vol))] + f = f[(f["max_dd"] >= float(args.max_dd_floor))] + f = f[(f["bias20_std"] >= float(args.min_bias_std)) & (f["bias20_std"] <= float(args.max_bias_std))] + + # score: prefer high vol and stable (less extreme dd). still keep high beta. + # normalize with ranks to avoid scale issues + f["r_vol"] = f["ann_vol"].rank(pct=True) + f["r_dd"] = f["max_dd"].rank(pct=True) # less negative => higher rank + f["score"] = 0.70 * f["r_vol"] + 0.30 * f["r_dd"] + + f = f.sort_values("score", ascending=False) + keep = set(f.head(int(args.top))["ts_code"].tolist()) + + new_conf = conf.copy() + new_conf["assets"] = [a for a in assets if a["ts_code"] in keep] + Path(args.out_config).write_text(json.dumps(new_conf, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + + print("profiled", len(prof), "filtered_keep", len(new_conf["assets"])) + print(f.head(15)[["ts_code", "ann_vol", "max_dd", "bias20_std", "score"]].to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/grid_search_opt.py b/scripts/grid_search_opt.py new file mode 100644 index 0000000..6bff2fd --- /dev/null +++ b/scripts/grid_search_opt.py @@ -0,0 +1,159 @@ +from __future__ import annotations + +import argparse +import itertools +import json +import random +from dataclasses import asdict, replace +from pathlib import Path + +import numpy as np +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy", "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, risk_proxy, rates_fallback + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252 ** 0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe} + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--out", default="data/grid_search_results.parquet") + ap.add_argument("--seed", type=int, default=1) + ap.add_argument("--max_combos", type=int, default=400, help="Randomly sample at most this many combos") + args = ap.parse_args() + + universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config)) + prices = load_prices(Path(args.rawdir), universe, args.start, args.end) + + base = TrendParams(target_ann_vol=0.25) + + # Keep grid small. We will sample max_combos from the full cartesian product. + grid = { + "sma_fast": [3, 5, 8], + "sma_slow": [15, 20, 30, 40], + "lazy_days": [2, 5], + "rebalance_band": [0.03, 0.06], + "atr_mult": [2.5, 3.2, 4.0], + "profit_tighten_atr": [3.0, 4.0], + "atr_mult_profit": [1.5, 2.0], + "stop_loss_atr": [2.5, 3.2], + "bias_exit": [0.12, 0.18], + "vol_ratio_exit": [2.0, 3.0], + "max_weight_per_asset": [0.7, 0.9], + "concentration_power": [1.6, 2.2], + } + + keys = list(grid.keys()) + combos = list(itertools.product(*(grid[k] for k in keys))) + + random.seed(int(args.seed)) + if int(args.max_combos) > 0 and len(combos) > int(args.max_combos): + combos = random.sample(combos, int(args.max_combos)) + + rows = [] + + for vals in combos: + kw = dict(zip(keys, vals)) + if int(kw["sma_fast"]) >= int(kw["sma_slow"]): + continue + + params = replace(base, **kw, rebalance_every=1, max_positions=constraints.max_positions) + + try: + equity, _w, _tr = run_backtest( + prices, + universe, + constraints, + params, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + except Exception: + continue + + st = perf_stats(equity["equity"]) + if not st: + continue + + row = {**st, **asdict(params)} + rows.append(row) + + df = pd.DataFrame(rows) + if df.empty: + print("no results") + return + + df = df[df["ann_vol"] <= 0.25].copy() + df = df.sort_values(["ann_return", "sharpe"], ascending=False) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(out, index=False) + + cols = [ + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "sma_fast", + "sma_slow", + "lazy_days", + "rebalance_band", + "atr_mult", + "profit_tighten_atr", + "atr_mult_profit", + "stop_loss_atr", + "bias_exit", + "vol_ratio_exit", + "max_weight_per_asset", + "concentration_power", + ] + + print("top10") + print(df[cols].head(10).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/grid_search_stage_a.py b/scripts/grid_search_stage_a.py new file mode 100644 index 0000000..2bf5135 --- /dev/null +++ b/scripts/grid_search_stage_a.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import argparse +import itertools +import json +from dataclasses import asdict, replace +from pathlib import Path + +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy", "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, risk_proxy, rates_fallback + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252 ** 0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe} + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--out", default="data/grid_stage_a.parquet") + args = ap.parse_args() + + universe, constraints, risk_proxy, rates_fallback = load_universe(Path(args.config)) + prices = load_prices(Path(args.rawdir), universe, args.start, args.end) + + base = TrendParams(target_ann_vol=0.25) + + sma_fast_list = [3, 5, 8] + sma_slow_list = [15, 20, 30, 40] + lazy_days_list = [1, 2, 5, 10] + band_list = [0.03, 0.05, 0.08] + atr_mult_list = [2.5, 3.0, 3.2, 4.0] + + rows = [] + + for sma_fast, sma_slow, lazy_days, band, atr_mult in itertools.product( + sma_fast_list, sma_slow_list, lazy_days_list, band_list, atr_mult_list + ): + if sma_fast >= sma_slow: + continue + + params = replace( + base, + rebalance_every=1, + max_positions=constraints.max_positions, + sma_fast=sma_fast, + sma_slow=sma_slow, + lazy_days=lazy_days, + rebalance_band=band, + atr_mult=float(atr_mult), + ) + + try: + equity, _w, _tr = run_backtest( + prices, + universe, + constraints, + params, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + except Exception: + continue + + st = perf_stats(equity["equity"]) + if not st: + continue + + row = {**st, **asdict(params)} + rows.append(row) + + df = pd.DataFrame(rows) + if df.empty: + print("no results") + return + + df = df[df["ann_vol"] <= 0.25].copy() + df = df.sort_values(["ann_return", "sharpe"], ascending=False) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(out, index=False) + + cols = ["ann_return", "ann_vol", "max_drawdown", "sharpe", "sma_fast", "sma_slow", "lazy_days", "rebalance_band", "atr_mult"] + print("top10") + print(df[cols].head(10).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/iterate_best_local.py b/scripts/iterate_best_local.py new file mode 100644 index 0000000..ccb7b28 --- /dev/null +++ b/scripts/iterate_best_local.py @@ -0,0 +1,472 @@ +from __future__ import annotations + +import argparse +import json +import random +import sqlite3 +from dataclasses import asdict, fields, replace +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 3)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, str(risk_proxy), str(rates_fallback) + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet") + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252**0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe} + + +def trades_per_year(trades: pd.DataFrame | None, start: str, end: str) -> float: + if trades is None or getattr(trades, "empty", True): + return 0.0 + years = max(1, (int(end[:4]) - int(start[:4]) + 1)) + return float(len(trades) / years) + + +def ensure_db(db_path: Path, param_cols: list[str]) -> None: + db_path.parent.mkdir(parents=True, exist_ok=True) + with sqlite3.connect(str(db_path)) as con: + con.execute("PRAGMA journal_mode=WAL") + con.execute("PRAGMA synchronous=NORMAL") + con.execute( + """ + CREATE TABLE IF NOT EXISTS trials ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + ts_utc TEXT NOT NULL, + code_version TEXT, + config_path TEXT, + start TEXT, + end TEXT, + seed INTEGER, + trial INTEGER, + jobs INTEGER, + ann_return REAL, + ann_vol REAL, + max_drawdown REAL, + sharpe REAL, + trades_per_year REAL + ) + """ + ) + for c in param_cols: + try: + con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL") + except sqlite3.OperationalError: + pass + + +def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None: + if not rows: + return + cols = [ + "run_id", + "ts_utc", + "code_version", + "config_path", + "start", + "end", + "seed", + "trial", + "jobs", + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "trades_per_year", + *param_cols, + ] + q = ",".join(["?"] * len(cols)) + join_cols = ",".join(cols) + sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})" + vals = [] + for r in rows: + vals.append([r.get(c) for c in cols]) + with sqlite3.connect(str(db_path)) as con: + con.executemany(sql, vals) + con.commit() + + +def load_state(path: Path) -> dict: + if path.exists(): + return json.loads(path.read_text(encoding="utf-8")) + return {"best": None, "last_reported_ann_return": None, "history": []} + + +def save_state(path: Path, state: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + + +def infer_code_version(repo_dir: Path) -> str: + head = repo_dir / ".git" / "HEAD" + if head.exists(): + try: + txt = head.read_text(encoding="utf-8").strip() + if txt.startswith("ref:"): + ref = txt.split(" ", 1)[1] + ref_path = repo_dir / ".git" / ref + if ref_path.exists(): + return ref_path.read_text(encoding="utf-8").strip() + return txt + except Exception: + return "unknown" + return "nogit" + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--trials", type=int, default=20) + ap.add_argument("--seed", type=int, default=1) + ap.add_argument("--jobs", type=int, default=1) + ap.add_argument("--state", default="data/opt_state.json") + ap.add_argument("--db", default="data/experiments.sqlite") + ap.add_argument("--max_trades_per_year", type=float, default=80.0) + ap.add_argument("--progress_every", type=int, default=5) + ap.add_argument( + "--tweak", + action="append", + default=[], + help=( + "Enable a tweak group. Repeatable. Options: macro, churn, stops, score, switches, switches2, signal1, orth_ma, orth_weights, orth_mech, asym_fast, positions, exits. " + "(Each group adjusts <=4 params around current best.)" + ), + ) + args = ap.parse_args() + + rng = random.Random(int(args.seed)) + np.random.seed(int(args.seed)) + + config_path = Path(args.config) + universe, constraints, risk_proxy, rates_fallback = load_universe(config_path) + prices = load_prices(Path(args.rawdir), universe, str(args.start), str(args.end)) + + state_path = Path(args.state) + state = load_state(state_path) + best_row = state.get("best") + if not best_row: + raise SystemExit("opt_state.json missing best") + + tp_fields = {f.name for f in fields(TrendParams)} + + defaults = TrendParams(max_positions=constraints.max_positions) + best_params = {k: best_row[k] for k in best_row.keys() if k in tp_fields} + + typed: dict[str, Any] = {} + for k, v in best_params.items(): + t = type(getattr(defaults, k)) + if t is int: + typed[k] = int(v) + elif t is float: + typed[k] = float(v) + else: + typed[k] = v + + base = replace(defaults, **typed) + + tweaks = set(args.tweak or []) + + def sample_params() -> TrendParams: + p = base + + if "macro" in tweaks: + p = replace( + p, + macro_min_breadth=float(rng.choice([0.10, 0.12, 0.15, 0.18, 0.20])), + macro_down_frac=float(rng.choice([0.75, 0.78, 0.80, 0.82, 0.85])), + ) + + if "churn" in tweaks: + p = replace( + p, + lazy_days=int(rng.choice([6, 8, 10])), + min_hold_days=int(rng.choice([2, 3, 4, 5])), + replace_score_gap=float(rng.choice([0.5, 0.8, 1.2, 1.6])), + ) + + if "switches" in tweaks: + # switch/constraint knobs (exactly 4 factors) + p = replace( + p, + desired_positions_min=int(rng.choice([1, 2, 3])), + replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])), + lazy_days=int(rng.choice([4, 6, 8, 10, 12])), + min_hold_days=int(rng.choice([1, 2, 3, 4, 5])), + ) + + if "switches2" in tweaks: + # route D churn control without forcing higher min holdings (desired_positions_min fixed) + # exactly 4 factors: replace_score_gap, lazy_days, min_hold_days, cooldown_days + p = replace( + p, + desired_positions_min=int(1), + replace_score_gap=float(rng.choice([0.5, 0.8, 1.0, 1.2, 1.6])), + lazy_days=int(rng.choice([8, 10, 12, 14, 16])), + min_hold_days=int(rng.choice([3, 5, 7, 10])), + cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])), + ) + + if "signal1" in tweaks: + # route D: improve signal quality (exactly 4 factors) + p = replace( + p, + min_score=float(rng.choice([0.0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.30])), + trend_strength_weight=float(rng.choice([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])), + score_vol_denom_floor=float(rng.choice([0.01, 0.02, 0.03, 0.04, 0.05])), + macro_min_breadth=float(rng.choice([0.10, 0.15, 0.20, 0.25, 0.30])), + ) + + + if "orth_ma" in tweaks: + # route R: orthogonal to score/stops/exits; explore timing knobs (exactly 4 factors) + p = replace( + p, + sma_fast=int(rng.choice([3, 5, 7, 9, 12])), + sma_slow=int(rng.choice([20, 30, 40, 60, 90])), + rebalance_every=int(rng.choice([1, 2, 3, 5])), + max_replaces_per_day=int(rng.choice([0, 1, 2])), + ) + if p.sma_fast >= p.sma_slow: + p = replace(p, sma_fast=max(3, int(p.sma_slow // 6))) + + + if "orth_weights" in tweaks: + # route R: orthogonal portfolio weight shape (exactly 4 factors) + max_positions = int(rng.choice([2, 3, 4, 5])) + desired_min = int(rng.choice([1, 2, 3])) + desired_max = int(rng.choice([2, 3, 4, 5])) + desired_min = min(desired_min, desired_max) + desired_max = min(desired_max, max_positions) + desired_min = min(desired_min, desired_max) + p = replace( + p, + max_positions=max_positions, + desired_positions_min=desired_min, + desired_positions_max=desired_max, + max_weight_per_asset=float(rng.choice([0.35, 0.45, 0.60, 0.75, 0.90, 1.00])), + ) + # concentration_power exists in TrendParams; adjust it separately (still counts as one factor) + p = replace(p, concentration_power=float(rng.choice([1.2, 1.6, 2.0, 2.2, 2.6, 3.0]))) + + + if "orth_mech" in tweaks: + # route R: mechanism/turnover knobs (exactly 4 factors) + p = replace( + p, + rebalance_every=int(rng.choice([1, 2, 3, 5])), + replace_score_gap=float(rng.choice([0.0, 0.3, 0.5, 0.8, 1.2])), + max_replaces_per_day=int(rng.choice([0, 1, 2, 3])), + cooldown_days=int(rng.choice([0, 2, 4, 6, 8, 10])), + ) + + + if "asym_fast" in tweaks: + # asymmetric bull/bear risk controls (fast-run) (exactly 4 factors) + p = replace( + p, + regime_confirm_days=int(rng.choice([2, 3, 4, 5])), + bull_atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])), + bear_atr_mult=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])), + bear_stop_loss_atr=float(rng.choice([2.0, 2.2, 2.4, 2.6, 2.8])), + ) + + + if "positions" in tweaks: + # concentration/positioning knobs (exactly 4 factors) + max_positions = int(rng.choice([2, 3, 4])) + desired_min = int(rng.choice([1, 2, 3])) + desired_max = int(rng.choice([2, 3, 4])) + # keep consistent + desired_min = min(desired_min, desired_max) + desired_max = min(desired_max, max_positions) + desired_min = min(desired_min, desired_max) + p = replace( + p, + max_positions=max_positions, + desired_positions_min=desired_min, + desired_positions_max=desired_max, + max_weight_per_asset=float(rng.choice([0.45, 0.60, 0.75, 0.90, 1.00])), + ) + + if "stops" in tweaks: + # risk-control fine search (route D: prefer higher sharpe / lower drawdown) + p = replace( + p, + atr_mult=float(rng.choice([3.0, 3.2, 3.4, 3.6])), + stop_loss_atr=float(rng.choice([2.4, 2.6, 2.8, 3.0, 3.2])), + profit_tighten_atr=float(rng.choice([4.0, 6.0, 8.0])), + atr_mult_profit=float(rng.choice([1.3, 1.5, 1.8, 2.0])), + ) + + if "exits" in tweaks: + # anomaly exits fine search (route D) - exactly 4 factors + p = replace( + p, + bias_window=int(rng.choice([10, 15, 20, 30])), + bias_exit=float(rng.choice([0.12, 0.16, 0.20, 0.25, 0.30])), + vol_short=int(rng.choice([3, 5, 8, 10])), + vol_ratio_exit=float(rng.choice([2.0, 2.5, 3.0, 3.5, 4.0])), + ) + + if "score" in tweaks: + # aggressive weight search for higher ann_return + p = replace( + p, + min_score=float(rng.choice([-0.10, 0.00, 0.05, 0.10, 0.20, 0.30, 0.40])), + trend_strength_weight=float(rng.choice([0.00, 0.20, 0.40, 0.60, 0.80, 1.00])), + w_r20=float(rng.choice([0.20, 0.35, 0.50, 0.65, 0.80])), + w_r60=float(rng.choice([0.00, 0.10, 0.20, 0.35, 0.50])), + ) + remain = 1.0 - (p.w_r20 + p.w_r60) + w_r5 = float(max(0.0, min(0.6, remain * 0.6))) + w_r120 = float(max(0.0, remain - w_r5)) + p = replace(p, w_r5=w_r5, w_r120=w_r120) + + return p + + param_cols = sorted(asdict(base).keys()) + db_path = Path(args.db) + ensure_db(db_path, param_cols=param_cols) + + run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_bestlocal_seed{int(args.seed)}" + ("_" + "-".join(sorted(tweaks)) if tweaks else "") + code_version = infer_code_version(Path(".")) + + best_ann = float(best_row.get("ann_return") or float("-inf")) + + rows_for_db: list[dict[str, Any]] = [] + valid = 0 + for t in range(int(args.trials)): + p = sample_params() + + equity, _w, tr = run_backtest( + prices, + universe, + constraints, + p, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + st = perf_stats(equity["equity"]) + if not st: + continue + + tpy = trades_per_year(tr, str(args.start), str(args.end)) + if tpy > float(args.max_trades_per_year): + continue + + valid += 1 + row = {**st, "trades_per_year": float(tpy), **asdict(p)} + row["trial"] = int(t) + row["seed"] = int(args.seed) + + if float(row["ann_return"]) > best_ann: + best_ann = float(row["ann_return"]) + state["best"] = row + save_state(state_path, state) + + db_row = { + "run_id": run_id, + "ts_utc": datetime.now(timezone.utc).isoformat(), + "code_version": code_version, + "config_path": str(config_path), + "start": str(args.start), + "end": str(args.end), + "seed": int(args.seed), + "trial": int(t), + "jobs": int(args.jobs), + "ann_return": float(row["ann_return"]), + "ann_vol": float(row["ann_vol"]), + "max_drawdown": float(row["max_drawdown"]), + "sharpe": float(row["sharpe"]), + "trades_per_year": float(row["trades_per_year"]), + } + for c in param_cols: + db_row[c] = row.get(c) + rows_for_db.append(db_row) + + if int(args.progress_every) > 0 and valid % int(args.progress_every) == 0: + print(f"progress valid={valid} best_ann={best_ann:.4f}", flush=True) + + if rows_for_db: + insert_rows(db_path, param_cols=param_cols, rows=rows_for_db) + + state.setdefault("history", []).append( + { + "timestamp": datetime.now(timezone.utc).isoformat(), + "run_id": run_id, + "code_version": code_version, + "config": str(args.config), + "start": str(args.start), + "end": str(args.end), + "trials": int(args.trials), + "jobs": int(args.jobs), + "best_ann_return": float(best_ann) if np.isfinite(best_ann) else None, + "db": str(args.db), + "base_from": "opt_state.best", + "tweaks": sorted(tweaks), + } + ) + save_state(state_path, state) + + df = pd.DataFrame(rows_for_db).sort_values(["ann_return"], ascending=False) + view_cols = [ + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "trades_per_year", + "atr_mult", + "stop_loss_atr", + "profit_tighten_atr", + "atr_mult_profit", + ] + view_cols = [c for c in view_cols if c in df.columns] + print("run_id", run_id) + print(df[view_cols].head(8).to_string(index=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/iterate_optimize.py b/scripts/iterate_optimize.py new file mode 100644 index 0000000..d49c282 --- /dev/null +++ b/scripts/iterate_optimize.py @@ -0,0 +1,499 @@ +from __future__ import annotations + +import argparse +import json +import os +import random +import sqlite3 +from dataclasses import asdict, replace +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import numpy as np +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + +# Globals for multiprocessing (fork mode shares memory COW) +_G_PRICES: dict[str, pd.DataFrame] | None = None +_G_UNIVERSE: list[UniverseAsset] | None = None +_G_CONSTRAINTS: Constraints | None = None +_G_RISK_PROXY: str | None = None +_G_RATES_FALLBACK: str | None = None + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 3)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, str(risk_proxy), str(rates_fallback) + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet") + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252**0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe} + + +def trades_per_year(trades: pd.DataFrame, start: str, end: str) -> float: + if trades is None or trades.empty: + return 0.0 + years = max(1, (int(end[:4]) - int(start[:4]) + 1)) + return float(len(trades) / years) + + +def load_state(path: Path) -> dict: + if path.exists(): + return json.loads(path.read_text(encoding="utf-8")) + return {"best": None, "last_reported_ann_return": None, "history": []} + + +def save_state(path: Path, state: dict) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(state, ensure_ascii=True, indent=2) + "\n", encoding="utf-8") + + +def infer_code_version(repo_dir: Path) -> str: + # Prefer git commit hash if available. + head = repo_dir / ".git" / "HEAD" + if head.exists(): + try: + txt = head.read_text(encoding="utf-8").strip() + if txt.startswith("ref:"): + ref = txt.split(" ", 1)[1] + ref_path = repo_dir / ".git" / ref + if ref_path.exists(): + return ref_path.read_text(encoding="utf-8").strip() + return txt + except Exception: + return "unknown" + return "nogit" + + +def ensure_db(db_path: Path, param_cols: list[str]) -> None: + db_path.parent.mkdir(parents=True, exist_ok=True) + with sqlite3.connect(str(db_path)) as con: + con.execute("PRAGMA journal_mode=WAL") + con.execute("PRAGMA synchronous=NORMAL") + con.execute( + """ + CREATE TABLE IF NOT EXISTS trials ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + run_id TEXT NOT NULL, + ts_utc TEXT NOT NULL, + code_version TEXT, + config_path TEXT, + start TEXT, + end TEXT, + seed INTEGER, + trial INTEGER, + jobs INTEGER, + ann_return REAL, + ann_vol REAL, + max_drawdown REAL, + sharpe REAL, + trades_per_year REAL + ) + """ + ) + # Add param columns if missing (structured fields) + for c in param_cols: + try: + con.execute(f"ALTER TABLE trials ADD COLUMN {c} REAL") + except sqlite3.OperationalError: + pass + + +def insert_rows(db_path: Path, param_cols: list[str], rows: list[dict[str, Any]]) -> None: + if not rows: + return + cols = [ + "run_id", + "ts_utc", + "code_version", + "config_path", + "start", + "end", + "seed", + "trial", + "jobs", + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "trades_per_year", + *param_cols, + ] + q = ",".join(["?"] * len(cols)) + join_cols = ",".join(cols) + sql = f"INSERT INTO trials ({join_cols}) VALUES ({q})" + vals = [] + for r in rows: + vals.append([r.get(c) for c in cols]) + with sqlite3.connect(str(db_path)) as con: + con.executemany(sql, vals) + con.commit() + + + + +def reservoir_sample_product(rng, iterables, k: int): + """Sample up to k combos from cartesian product.""" + import itertools + + sample = [] + n = 0 + for combo in itertools.product(*iterables): + n += 1 + if len(sample) < k: + sample.append(combo) + else: + j = rng.randrange(n) + if j < k: + sample[j] = combo + return sample + + +def _init_globals(prices: dict[str, pd.DataFrame], universe: list[UniverseAsset], constraints: Constraints, risk_proxy: str, rates_fallback: str) -> None: + global _G_PRICES, _G_UNIVERSE, _G_CONSTRAINTS, _G_RISK_PROXY, _G_RATES_FALLBACK + _G_PRICES = prices + _G_UNIVERSE = universe + _G_CONSTRAINTS = constraints + _G_RISK_PROXY = risk_proxy + _G_RATES_FALLBACK = rates_fallback + + +def _eval_one(task: dict[str, Any]) -> dict[str, Any] | None: + assert _G_PRICES is not None + assert _G_UNIVERSE is not None + assert _G_CONSTRAINTS is not None + assert _G_RISK_PROXY is not None + assert _G_RATES_FALLBACK is not None + + params = TrendParams() + params = replace(params, **task["params"]) + + try: + equity, _w, tr = run_backtest( + _G_PRICES, + _G_UNIVERSE, + _G_CONSTRAINTS, + params, + rates_fallback=_G_RATES_FALLBACK, + risk_proxy=_G_RISK_PROXY, + ) + except Exception: + return None + + st = perf_stats(equity["equity"]) + if not st: + return None + + tpy = trades_per_year(tr, task["start"], task["end"]) + if tpy > float(task["max_trades_per_year"]): + return None + + row = {**st, "trades_per_year": float(tpy), **asdict(params)} + row["trial"] = int(task["trial"]) + row["seed"] = int(task["seed"]) + return row + + +MAX_GRID_COMBOS = 128 + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default="20200101") + ap.add_argument("--end", default="20251231") + ap.add_argument("--trials", type=int, default=240) + ap.add_argument("--mode", choices=["random", "grid"], default="random") + ap.add_argument("--max_grid", type=int, default=MAX_GRID_COMBOS) + ap.add_argument("--seed", type=int, default=1) + ap.add_argument("--jobs", type=int, default=1, help="Parallel workers (processes), up to 8") + ap.add_argument("--state", default="data/opt_state.json") + ap.add_argument("--db", default="data/experiments.sqlite") + ap.add_argument("--baseline", type=float, default=None) + ap.add_argument("--report_step", type=float, default=0.05) + ap.add_argument("--max_trades_per_year", type=float, default=80.0) + ap.add_argument("--progress_every", type=int, default=25) + args = ap.parse_args() + jobs = max(1, min(8, int(args.jobs))) + + random.seed(args.seed) + np.random.seed(args.seed) + + config_path = Path(args.config) + universe, constraints, risk_proxy, rates_fallback = load_universe(config_path) + prices = load_prices(Path(args.rawdir), universe, args.start, args.end) + _init_globals(prices, universe, constraints, risk_proxy, rates_fallback) + + state_path = Path(args.state) + state = load_state(state_path) + + best = state.get("best") + best_ann = float(best["ann_return"]) if best else float("-inf") + + baseline = args.baseline + if baseline is None: + baseline = best_ann if np.isfinite(best_ann) else 0.0 + + last_rep = state.get("last_reported_ann_return") + if last_rep is None: + last_rep = baseline + + params0 = TrendParams(max_positions=constraints.max_positions) + params0_dict = asdict(params0) + + # Parameter columns to persist as structured fields in SQLite + param_cols = sorted(params0_dict.keys()) + + db_path = Path(args.db) + ensure_db(db_path, param_cols=param_cols) + + run_id = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + f"_seed{int(args.seed)}" + code_version = infer_code_version(Path(".")) + + tasks: list[dict[str, Any]] = [] + + rng = random.Random(int(args.seed)) + + if str(args.mode) == "grid": + grids = { + "sma_fast": [3, 5], + "sma_slow": [15, 20, 30], + "lazy_days": [4, 5, 6, 8], + "min_hold_days": [2, 3, 5], + "replace_score_gap": [0.5, 0.8, 1.2, 1.6], + "min_score": [0.0, 0.2, 0.4, 0.6], + "desired_positions_min": [1, 2], + "macro_min_breadth": [0.10, 0.15, 0.20, 0.30], + "macro_down_frac": [0.75, 0.80, 0.85], + "atr_mult": [2.5, 3.2, 4.0], + "stop_loss_atr": [2.0, 2.5, 3.2], + "profit_tighten_atr": [4.0, 6.0, 8.0], + "atr_mult_profit": [1.5, 2.0, 2.5], + "bias_exit": [0.12, 0.18, 0.25], + "vol_ratio_exit": [3.0, 4.0], + } + + keys = list(grids.keys()) + iters = [list(grids[k]) for k in keys] + total = 1 + for xs in iters: + total *= max(1, len(xs)) + max_grid = max(1, int(args.max_grid)) + if total > max_grid: + print(f"grid combos {total} > {max_grid}; sampling combos", flush=True) + combos = reservoir_sample_product(rng, iters, max_grid) + else: + import itertools + combos = list(itertools.product(*iters)) + + for t, combo in enumerate(combos): + vals = dict(zip(keys, combo)) + sma_fast = int(vals["sma_fast"]) + sma_slow = int(vals["sma_slow"]) + if sma_fast >= sma_slow: + continue + p = replace( + params0, + sma_fast=sma_fast, + sma_slow=sma_slow, + lazy_days=int(vals["lazy_days"]), + min_hold_days=int(vals["min_hold_days"]), + replace_score_gap=float(vals["replace_score_gap"]), + min_score=float(vals["min_score"]), + desired_positions_min=int(vals["desired_positions_min"]), + desired_positions_max=int(3), + macro_min_breadth=float(vals["macro_min_breadth"]), + macro_down_frac=float(vals["macro_down_frac"]), + atr_mult=float(vals["atr_mult"]), + stop_loss_atr=float(vals["stop_loss_atr"]), + profit_tighten_atr=float(vals["profit_tighten_atr"]), + atr_mult_profit=float(vals["atr_mult_profit"]), + bias_exit=float(vals["bias_exit"]), + vol_ratio_exit=float(vals["vol_ratio_exit"]), + rebalance_every=1, + ) + + tasks.append({ + "trial": int(t), + "seed": int(args.seed), + "start": str(args.start), + "end": str(args.end), + "max_trades_per_year": float(args.max_trades_per_year), + "params": {k: asdict(p)[k] for k in param_cols}, + }) + else: + for t in range(int(args.trials)): + sma_fast = rng.choice([3, 5]) + sma_slow = rng.choice([15, 20, 30]) + if sma_fast >= sma_slow: + continue + lazy_days = rng.choice([4, 5, 6, 8]) + min_hold = rng.choice([2, 3, 5]) + replace_gap = rng.choice([0.5, 0.8, 1.2, 1.6]) + min_score = rng.choice([0.0, 0.2, 0.4, 0.6]) + dmin = rng.choice([1, 2]) + dmax = 3 + macro_min_breadth = rng.choice([0.10, 0.15, 0.20, 0.30]) + macro_down_frac = rng.choice([0.75, 0.80, 0.85]) + atr_mult = rng.choice([2.5, 3.2, 4.0]) + stop_loss_atr = rng.choice([2.0, 2.5, 3.2]) + profit_tighten_atr = rng.choice([4.0, 6.0, 8.0]) + atr_mult_profit = rng.choice([1.5, 2.0, 2.5]) + bias_exit = rng.choice([0.12, 0.18, 0.25]) + vol_ratio_exit = rng.choice([3.0, 4.0]) + p = replace(params0, sma_fast=int(sma_fast), sma_slow=int(sma_slow), lazy_days=int(lazy_days), min_hold_days=int(min_hold), replace_score_gap=float(replace_gap), min_score=float(min_score), desired_positions_min=int(dmin), desired_positions_max=int(dmax), macro_min_breadth=float(macro_min_breadth), macro_down_frac=float(macro_down_frac), atr_mult=float(atr_mult), stop_loss_atr=float(stop_loss_atr), profit_tighten_atr=float(profit_tighten_atr), atr_mult_profit=float(atr_mult_profit), bias_exit=float(bias_exit), vol_ratio_exit=float(vol_ratio_exit), rebalance_every=1) + tasks.append({"trial": int(t), "seed": int(args.seed), "start": str(args.start), "end": str(args.end), "max_trades_per_year": float(args.max_trades_per_year), "params": {k: asdict(p)[k] for k in param_cols}}) + + + results: list[dict[str, Any]] = [] + rows_for_db: list[dict[str, Any]] = [] + + def record_row(row: dict[str, Any]) -> None: + nonlocal best_ann + results.append(row) + + if float(row["ann_return"]) > best_ann: + best_ann = float(row["ann_return"]) + state["best"] = row + save_state(state_path, state) + + db_row = { + "run_id": run_id, + "ts_utc": datetime.now(timezone.utc).isoformat(), + "code_version": code_version, + "config_path": str(config_path), + "start": str(args.start), + "end": str(args.end), + "seed": int(args.seed), + "trial": int(row.get("trial", -1)), + "jobs": int(jobs), + "ann_return": float(row["ann_return"]), + "ann_vol": float(row["ann_vol"]), + "max_drawdown": float(row["max_drawdown"]), + "sharpe": float(row["sharpe"]), + "trades_per_year": float(row["trades_per_year"]), + } + for c in param_cols: + db_row[c] = row.get(c) + rows_for_db.append(db_row) + + if len(rows_for_db) >= 200: + insert_rows(db_path, param_cols=param_cols, rows=rows_for_db) + rows_for_db.clear() + + if jobs == 1: + for task in tasks: + row = _eval_one(task) + if row is None: + continue + record_row(row) + if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0): + print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True) + else: + import multiprocessing as mp + from concurrent.futures import ProcessPoolExecutor, as_completed + + ctx = mp.get_context("fork") + with ProcessPoolExecutor(max_workers=jobs, mp_context=ctx) as ex: + futs = [ex.submit(_eval_one, task) for task in tasks] + for fut in as_completed(futs): + row = fut.result() + if row is None: + continue + record_row(row) + if int(args.progress_every) > 0 and (len(results) % int(args.progress_every) == 0): + print(f"progress valid={len(results)} best_ann={best_ann:.4f}", flush=True) + + if rows_for_db: + insert_rows(db_path, param_cols=param_cols, rows=rows_for_db) + rows_for_db.clear() + + state["history"].append( + { + "timestamp": datetime.now(timezone.utc).isoformat(), + "run_id": run_id, + "code_version": code_version, + "config": str(args.config), + "start": str(args.start), + "end": str(args.end), + "trials": int(args.trials), + "jobs": int(jobs), + "best_ann_return": float(best_ann) if np.isfinite(best_ann) else None, + "db": str(args.db), + } + ) + save_state(state_path, state) + + if not results: + print("no valid trials") + return + + df = pd.DataFrame(results).sort_values(["ann_return"], ascending=False) + + cols = [ + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "trades_per_year", + "sma_fast", + "sma_slow", + "lazy_days", + "min_hold_days", + "replace_score_gap", + "min_score", + "macro_min_breadth", + "macro_down_frac", + "desired_positions_min", + "atr_mult", + "stop_loss_atr", + "profit_tighten_atr", + "atr_mult_profit", + "bias_exit", + "vol_ratio_exit", + ] + cols = [c for c in cols if c in df.columns] + print(df[cols].head(12).to_string(index=False)) + + if best_ann >= float(last_rep) + float(args.report_step): + state["last_reported_ann_return"] = float(best_ann) + save_state(state_path, state) + print("REPORT_TRIGGER", float(best_ann), "baseline", float(last_rep)) + + +if __name__ == "__main__": + main() diff --git a/scripts/report_last_run.py b/scripts/report_last_run.py new file mode 100644 index 0000000..0d1e3b0 --- /dev/null +++ b/scripts/report_last_run.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import argparse +import json +import sqlite3 +from pathlib import Path +from typing import Any + + +def fetch_top(con: sqlite3.Connection, run_id: str, limit: int) -> list[dict[str, Any]]: + cols = [r[1] for r in con.execute("PRAGMA table_info(trials)")] + sql = "SELECT * FROM trials WHERE run_id = ? ORDER BY ann_return DESC LIMIT ?" + rows = [] + for r in con.execute(sql, [run_id, int(limit)]): + rows.append(dict(zip(cols, r))) + return rows + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--state", default="data/opt_state.json") + ap.add_argument("--db", default="data/experiments.sqlite") + ap.add_argument("--top", type=int, default=5) + args = ap.parse_args() + + state_path = Path(args.state) + state = json.loads(state_path.read_text(encoding="utf-8")) + + hist = state.get("history") or [] + if not hist: + raise SystemExit("no history in opt_state.json") + + last = hist[-1] + run_id = str(last.get("run_id")) + + best = state.get("best") + + print("last_run_id", run_id) + print("last_run", {k: last.get(k) for k in ["timestamp", "seed", "trials", "jobs", "best_ann_return", "code_version"] if k in last}) + if best: + print( + "global_best", + { + "ann_return": best.get("ann_return"), + "ann_vol": best.get("ann_vol"), + "max_drawdown": best.get("max_drawdown"), + "sharpe": best.get("sharpe"), + "trades_per_year": best.get("trades_per_year"), + }, + ) + + db_path = Path(args.db) + with sqlite3.connect(str(db_path)) as con: + rows = fetch_top(con, run_id=run_id, limit=int(args.top)) + + if not rows: + print("no rows for run_id") + return + + def slim(r: dict[str, Any]) -> dict[str, Any]: + keys = [ + "id", + "trial", + "ann_return", + "ann_vol", + "max_drawdown", + "sharpe", + "trades_per_year", + "sma_fast", + "sma_slow", + "lazy_days", + "min_hold_days", + "replace_score_gap", + "min_score", + "macro_min_breadth", + "macro_down_frac", + "desired_positions_min", + "atr_mult", + "stop_loss_atr", + "profit_tighten_atr", + "atr_mult_profit", + "bias_exit", + "vol_ratio_exit", + ] + return {k: r.get(k) for k in keys if k in r} + + print("top_trials") + for r in rows: + print(json.dumps(slim(r), ensure_ascii=False)) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_etf_trend_backtest.py b/scripts/run_etf_trend_backtest.py new file mode 100644 index 0000000..50b1e4a --- /dev/null +++ b/scripts/run_etf_trend_backtest.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +import argparse +import json +from dataclasses import fields +from pathlib import Path + +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset]) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / f"{a.ts_code.replace('.', '')}.parquet" + if not fn.exists(): + raise FileNotFoundError(f"missing data file: {fn}") + df = pd.read_parquet(fn) + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252**0.5)) + dd = (equity / equity.cummax() - 1.0).min() + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": float(dd)} + + +def add_trendparams_args(p: argparse.ArgumentParser) -> None: + # Expose a subset of TrendParams for fast experiments / grid search verification. + # Keep names stable and CLI-friendly (kebab-case). + tp_fields = {f.name: f for f in fields(TrendParams)} + + def add(name: str, arg: str, typ, help_: str) -> None: + if name not in tp_fields: + return + p.add_argument(arg, type=typ, default=None, help=help_) + + add("sma_fast", "--sma-fast", int, "SMA fast window") + add("sma_slow", "--sma-slow", int, "SMA slow window") + add("lazy_days", "--lazy-days", int, "Min days between switches") + add("min_hold_days", "--min-hold-days", int, "Min hold days before trend-exit/switch") + add("replace_score_gap", "--replace-score-gap", float, "Replace weakest only if score gap >= this") + add("min_score", "--min-score", float, "Entry score threshold (allow empty if not met)") + add("macro_down_frac", "--macro-down-frac", float, "Down-day breadth threshold for consistent down") + add("desired_positions_min", "--desired-positions-min", int, "Desired min positions (allow empty)") + add("desired_positions_max", "--desired-positions-max", int, "Desired max positions") + add("rebalance_band", "--rebalance-band", float, "Ignore small weight changes") + + add("atr_mult", "--atr-mult", float, "Chandelier ATR multiple") + add("profit_tighten_atr", "--profit-tighten-atr", float, "Tighten trailing after profit >= N*ATR") + add("atr_mult_profit", "--atr-mult-profit", float, "Chandelier ATR multiple after tighten") + add("stop_loss_atr", "--stop-loss-atr", float, "Hard stop loss from entry in ATR") + + add("bias_exit", "--bias-exit", float, "Exit when abs(bias) >= threshold") + add("vol_ratio_exit", "--vol-ratio-exit", float, "Exit when volume/amount ratio >= threshold") + + add("max_weight_per_asset", "--max-weight-per-asset", float, "Max weight per risky asset") + add("concentration_power", "--concentration-power", float, "Weight concentration power") + + add("macro_min_breadth", "--macro-min-breadth", float, "Min equity breadth to be risk-on") + add("macro_scale_risk_off", "--macro-scale-risk-off", float, "Scale risky weights in risk-off") + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--config", default="configs/etf_universe.json") + p.add_argument("--rawdir", default="data/raw") + p.add_argument("--out", default="data/etf_trend_equity.parquet") + p.add_argument("--start", default="20200101", help="Filter start trade_date YYYYMMDD (inclusive)") + p.add_argument("--end", default="20251231", help="Filter end trade_date YYYYMMDD (inclusive)") + add_trendparams_args(p) + args = p.parse_args() + + conf = json.loads(Path(args.config).read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 4)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 1)), + must_rates=int(cons.get("must_include", {}).get("rates", 1)), + must_equity=int(cons.get("must_include", {}).get("equity", 1)), + ) + + params = TrendParams(max_positions=constraints.max_positions) + + # apply CLI overrides + overrides = { + "sma_fast": args.sma_fast, + "sma_slow": args.sma_slow, + "lazy_days": args.lazy_days, + "min_hold_days": getattr(args, "min_hold_days", None), + "replace_score_gap": getattr(args, "replace_score_gap", None), + "min_score": getattr(args, "min_score", None), + "macro_down_frac": getattr(args, "macro_down_frac", None), + "desired_positions_min": getattr(args, "desired_positions_min", None), + "desired_positions_max": getattr(args, "desired_positions_max", None), + "rebalance_band": args.rebalance_band, + "atr_mult": args.atr_mult, + "profit_tighten_atr": args.profit_tighten_atr, + "atr_mult_profit": args.atr_mult_profit, + "stop_loss_atr": args.stop_loss_atr, + "bias_exit": args.bias_exit, + "vol_ratio_exit": args.vol_ratio_exit, + "max_weight_per_asset": args.max_weight_per_asset, + "concentration_power": args.concentration_power, + "macro_min_breadth": args.macro_min_breadth, + "macro_scale_risk_off": args.macro_scale_risk_off, + } + overrides = {k: v for k, v in overrides.items() if v is not None} + if overrides: + params = TrendParams(**{**params.__dict__, **overrides}) + + risk_proxy = cons.get("risk_proxy", "510300.SH") + + rates_fallback = cons.get("rates_fallback") + if rates_fallback is None: + for a in universe: + if a.asset_class.startswith("rates"): + rates_fallback = a.ts_code + break + if not rates_fallback: + raise RuntimeError("universe must include a rates asset for fallback") + + prices = load_prices(Path(args.rawdir), universe) + for k, df in prices.items(): + d = df.copy() + d["trade_date"] = d["trade_date"].astype(str) + d = d[(d["trade_date"] >= str(args.start)) & (d["trade_date"] <= str(args.end))] + prices[k] = d + + equity, weights, trades = run_backtest(prices, universe, constraints, params, rates_fallback=rates_fallback, risk_proxy=risk_proxy) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + equity.to_parquet(out) + + weights_path = out.with_name(out.stem + "_weights" + out.suffix) + trades_path = out.with_name(out.stem + "_trades" + out.suffix) + weights.to_parquet(weights_path) + if trades is not None and not trades.empty: + trades.to_parquet(trades_path, index=False) + print(f"wrote trades -> {trades_path}") + + st = perf_stats(equity["equity"]) + print("perf", st) + print("last equity", float(equity["equity"].iloc[-1])) + print("last weights", weights.iloc[-1].sort_values(ascending=False).head(10).to_dict()) + + +if __name__ == "__main__": + main() diff --git a/scripts/run_iter20_loop.sh b/scripts/run_iter20_loop.sh new file mode 100755 index 0000000..e69de29 diff --git a/scripts/run_macro20.sh b/scripts/run_macro20.sh new file mode 100644 index 0000000..e69de29 diff --git a/scripts/smoke.py b/scripts/smoke.py new file mode 100644 index 0000000..c2edea1 --- /dev/null +++ b/scripts/smoke.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + +from qfr.factors import winsorize_by_date, zscore_by_date +from qfr.metrics import information_coefficient + + +def main() -> None: + dates = pd.to_datetime(["2026-01-01", "2026-01-02", "2026-01-03"]) + assets = ["A", "B", "C", "D"] + idx = pd.MultiIndex.from_product([dates, assets], names=["date", "asset"]) + + rng = np.random.default_rng(42) + factor = pd.Series(rng.normal(size=len(idx)), index=idx) + fwd_ret = pd.Series(rng.normal(scale=0.01, size=len(idx)), index=idx) + + factor2 = zscore_by_date(winsorize_by_date(factor)) + ic = information_coefficient(factor2, fwd_ret) + + print("IC mean:", float(ic.mean())) + + +if __name__ == "__main__": + main() diff --git a/scripts/tushare_download_daily.py b/scripts/tushare_download_daily.py new file mode 100644 index 0000000..efccdd9 --- /dev/null +++ b/scripts/tushare_download_daily.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +import argparse +from pathlib import Path + +from qfr.data.tushare_client import fetch_daily, load_tushare_config + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--env", default=None, help="Path to .env (default: auto-detect)") + p.add_argument("--ts-code", default=None, help="e.g. 000001.SZ") + p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD") + p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD") + p.add_argument("--trade-date", default=None, help="YYYYMMDD") + p.add_argument("--out", default="data/raw/tushare_daily.parquet") + args = p.parse_args() + + cfg = load_tushare_config(args.env) + df = fetch_daily( + cfg, + ts_code=args.ts_code, + trade_date=args.trade_date, + start_date=args.start_date, + end_date=args.end_date, + ) + + out = Path(args.out) + out.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(out, index=False) + print(f"wrote {len(df)} rows -> {out}") + + +if __name__ == "__main__": + main() diff --git a/scripts/tushare_download_universe.py b/scripts/tushare_download_universe.py new file mode 100644 index 0000000..63a449c --- /dev/null +++ b/scripts/tushare_download_universe.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +import argparse +import json +from pathlib import Path + +import pandas as pd + +from qfr.data.tushare_client import fetch_fund_daily, load_tushare_config + + +def main() -> None: + p = argparse.ArgumentParser() + p.add_argument("--env", default=None, help="Path to .env") + p.add_argument("--config", default="configs/etf_universe.json") + p.add_argument("--start", dest="start_date", default=None, help="YYYYMMDD") + p.add_argument("--end", dest="end_date", default=None, help="YYYYMMDD") + p.add_argument("--outdir", default="data/raw") + args = p.parse_args() + + cfg = load_tushare_config(args.env) + + conf = json.loads(Path(args.config).read_text(encoding="utf-8")) + assets = conf["assets"] + outdir = Path(args.outdir) + outdir.mkdir(parents=True, exist_ok=True) + + for a in assets: + ts_code = a["ts_code"] + df = fetch_fund_daily(cfg, ts_code=ts_code, start_date=args.start_date, end_date=args.end_date) + if df is None or df.empty: + print(f"skip {ts_code}: empty") + continue + + # standardize columns expected by backtest + # fund_daily provides: ts_code, trade_date, open, high, low, close, vol, amount + keep = [c for c in ["ts_code", "trade_date", "open", "high", "low", "close", "vol", "amount"] if c in df.columns] + df = df[keep].copy() + df = df.sort_values("trade_date") + + out = outdir / f"{ts_code.replace('.', '')}.parquet" + df.to_parquet(out, index=False) + print(f"wrote {ts_code}: {len(df)} rows -> {out}") + + +if __name__ == "__main__": + main() diff --git a/scripts/verify_topn.py b/scripts/verify_topn.py new file mode 100644 index 0000000..2a81b93 --- /dev/null +++ b/scripts/verify_topn.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import argparse +import json +import sqlite3 +from dataclasses import fields +from pathlib import Path +from typing import Any + +import pandas as pd + +from qfr.strategy.etf_trend import Constraints, TrendParams, UniverseAsset, run_backtest + + +def load_universe(config_path: Path) -> tuple[list[UniverseAsset], Constraints, str, str]: + conf = json.loads(config_path.read_text(encoding="utf-8")) + universe = [UniverseAsset(**a) for a in conf["assets"]] + + cons = conf.get("constraints", {}) + constraints = Constraints( + max_positions=int(cons.get("max_positions", 3)), + must_commodity=int(cons.get("must_include", {}).get("commodity", 0)), + must_rates=int(cons.get("must_include", {}).get("rates", 0)), + must_equity=int(cons.get("must_include", {}).get("equity", 0)), + ) + + risk_proxy = cons.get("risk_proxy") or (universe[0].ts_code if universe else "510300.SH") + rates_fallback = cons.get("rates_fallback", "511010.SH") + return universe, constraints, str(risk_proxy), str(rates_fallback) + + +def load_prices(raw_dir: Path, universe: list[UniverseAsset], start: str, end: str) -> dict[str, pd.DataFrame]: + out: dict[str, pd.DataFrame] = {} + for a in universe: + fn = raw_dir / (a.ts_code.replace(".", "") + ".parquet") + df = pd.read_parquet(fn) + df = df.copy() + df["trade_date"] = df["trade_date"].astype(str) + df = df[(df["trade_date"] >= start) & (df["trade_date"] <= end)] + out[a.ts_code] = df + return out + + +def perf_stats(equity: pd.Series) -> dict[str, float]: + r = equity.pct_change().dropna() + if r.empty: + return {} + ann_ret = float((equity.iloc[-1] / equity.iloc[0]) ** (252 / len(r)) - 1) + ann_vol = float(r.std(ddof=1) * (252**0.5)) + dd = float((equity / equity.cummax() - 1.0).min()) + sharpe = float(ann_ret / ann_vol) if ann_vol > 0 else float("nan") + return {"ann_return": ann_ret, "ann_vol": ann_vol, "max_drawdown": dd, "sharpe": sharpe} + + +def table_columns(con: sqlite3.Connection, table: str) -> list[str]: + return [row[1] for row in con.execute(f"PRAGMA table_info({table})")] + + +def fetch_topn(db_path: Path, run_id: str | None, topn: int) -> tuple[list[str], list[dict[str, Any]]]: + with sqlite3.connect(str(db_path)) as con: + cols = table_columns(con, "trials") + where = "" + params: list[Any] = [] + if run_id: + where = "WHERE run_id = ?" + params.append(run_id) + sql = f"SELECT * FROM trials {where} ORDER BY ann_return DESC LIMIT ?" + rows: list[dict[str, Any]] = [] + for r in con.execute(sql, [*params, int(topn)]): + rows.append(dict(zip(cols, r))) + return cols, rows + + +def main() -> None: + ap = argparse.ArgumentParser() + ap.add_argument("--db", default="data/experiments.sqlite") + ap.add_argument("--run_id", default=None) + ap.add_argument("--topn", type=int, default=10) + ap.add_argument("--config", default="configs/etf_universe_industry_profiled.json") + ap.add_argument("--rawdir", default="data/raw") + ap.add_argument("--start", default=None) + ap.add_argument("--end", default=None) + ap.add_argument("--tol", type=float, default=1e-6) + args = ap.parse_args() + + db_path = Path(args.db) + cols, rows = fetch_topn(db_path, args.run_id, args.topn) + if not rows: + print("no trials found") + return + + config_path = Path(args.config) + universe, constraints, risk_proxy, rates_fallback = load_universe(config_path) + + tp_fields = {f.name for f in fields(TrendParams)} + # Coerce param types: sqlite stores numerics as REAL, so ints may come back as floats. + _defaults = TrendParams() + _field_types = {name: type(getattr(_defaults, name)) for name in tp_fields} + + def _coerce(name: str, v): + if v is None: + return None + t = _field_types.get(name) + if t is int: + return int(round(float(v))) + if t is bool: + return bool(int(round(float(v)))) + return float(v) + + + mismatches = 0 + for idx, row in enumerate(rows, start=1): + start = str(args.start or row.get("start") or "20200101") + end = str(args.end or row.get("end") or "20251231") + + prices = load_prices(Path(args.rawdir), universe, start, end) + + params_dict: dict[str, Any] = {} + for k in cols: + if k in tp_fields and row.get(k) is not None: + params_dict[k] = _coerce(k, row[k]) + params_dict.setdefault("max_positions", constraints.max_positions) + + tp = TrendParams(**params_dict) + equity, _weights, _trades = run_backtest( + prices, + universe, + constraints, + tp, + rates_fallback=rates_fallback, + risk_proxy=risk_proxy, + ) + + st = perf_stats(equity["equity"]) + diffs = {k: float(st[k] - float(row.get(k) or 0.0)) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]} + bad = any(abs(v) > float(args.tol) for v in diffs.values()) + if bad: + mismatches += 1 + + tag = "MISMATCH" if bad else "OK" + print(f"[{idx}] {tag} id={row.get('id')} run_id={row.get('run_id')} start={start} end={end}") + print(" orig:", {k: row.get(k) for k in ["ann_return", "ann_vol", "max_drawdown", "sharpe"]}) + print(" re :", st) + print(" diff:", diffs) + + print(f"done. mismatches={mismatches}/{len(rows)}") + + +if __name__ == "__main__": + main() diff --git a/src/qfr.egg-info/PKG-INFO b/src/qfr.egg-info/PKG-INFO new file mode 100644 index 0000000..aeffead --- /dev/null +++ b/src/qfr.egg-info/PKG-INFO @@ -0,0 +1,5 @@ +Metadata-Version: 2.4 +Name: qfr +Version: 0.1.0 +Summary: Quant factor research toolkit +Requires-Python: >=3.11 diff --git a/src/qfr.egg-info/SOURCES.txt b/src/qfr.egg-info/SOURCES.txt new file mode 100644 index 0000000..8a694f9 --- /dev/null +++ b/src/qfr.egg-info/SOURCES.txt @@ -0,0 +1,12 @@ +README.md +pyproject.toml +src/qfr/__init__.py +src/qfr/factors.py +src/qfr/metrics.py +src/qfr.egg-info/PKG-INFO +src/qfr.egg-info/SOURCES.txt +src/qfr.egg-info/dependency_links.txt +src/qfr.egg-info/top_level.txt +src/qfr/data/__init__.py +src/qfr/data/tushare_client.py +src/qfr/strategy/etf_trend.py \ No newline at end of file diff --git a/src/qfr.egg-info/dependency_links.txt b/src/qfr.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/qfr.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/qfr.egg-info/top_level.txt b/src/qfr.egg-info/top_level.txt new file mode 100644 index 0000000..82c3cab --- /dev/null +++ b/src/qfr.egg-info/top_level.txt @@ -0,0 +1 @@ +qfr diff --git a/src/qfr/__init__.py b/src/qfr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/qfr/data/__init__.py b/src/qfr/data/__init__.py new file mode 100644 index 0000000..0db88ae --- /dev/null +++ b/src/qfr/data/__init__.py @@ -0,0 +1 @@ +"""Data access layer.""" diff --git a/src/qfr/data/tushare_client.py b/src/qfr/data/tushare_client.py new file mode 100644 index 0000000..9469e8c --- /dev/null +++ b/src/qfr/data/tushare_client.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass + +import pandas as pd +from dotenv import load_dotenv + + +@dataclass(frozen=True) +class TushareConfig: + token: str + timeout: int = 30 + + +def load_tushare_config(env_path: str | None = None) -> TushareConfig: + if env_path: + load_dotenv(env_path) + else: + load_dotenv() + + token = os.getenv("TUSHARE_TOKEN", "").strip() + if not token: + raise RuntimeError("TUSHARE_TOKEN is required (set it in .env)") + + timeout_s = os.getenv("TUSHARE_TIMEOUT", "30").strip() or "30" + try: + timeout = int(timeout_s) + except ValueError: + timeout = 30 + + return TushareConfig(token=token, timeout=timeout) + + +def pro_api(cfg: TushareConfig): + import tushare as ts + + ts.set_token(cfg.token) + return ts.pro_api(timeout=cfg.timeout) + + +def fetch_stock_daily( + cfg: TushareConfig, + ts_code: str | None = None, + trade_date: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + fields: str | None = None, +) -> pd.DataFrame: + api = pro_api(cfg) + return api.daily( + ts_code=ts_code, + trade_date=trade_date, + start_date=start_date, + end_date=end_date, + fields=fields, + ) + + +def fetch_fund_daily( + cfg: TushareConfig, + ts_code: str | None = None, + trade_date: str | None = None, + start_date: str | None = None, + end_date: str | None = None, + fields: str | None = None, +) -> pd.DataFrame: + """Fetch ETF/fund daily bars via Tushare Pro `fund_daily`.""" + api = pro_api(cfg) + return api.fund_daily( + ts_code=ts_code, + trade_date=trade_date, + start_date=start_date, + end_date=end_date, + fields=fields, + ) diff --git a/src/qfr/factors.py b/src/qfr/factors.py new file mode 100644 index 0000000..6ee95c2 --- /dev/null +++ b/src/qfr/factors.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import pandas as pd + + +def winsorize_by_date(x: pd.Series, lower_q: float = 0.01, upper_q: float = 0.99) -> pd.Series: + """Winsorize cross-sectional values per date. + + Expects x to have a MultiIndex with level 0 as date, or a DatetimeIndex. + """ + if isinstance(x.index, pd.MultiIndex): + level = 0 + def _clip(s: pd.Series) -> pd.Series: + lo = s.quantile(lower_q) + hi = s.quantile(upper_q) + return s.clip(lo, hi) + out = x.groupby(level=level).apply(_clip) + # groupby(...).apply adds the group key as an extra outer index level + return out.droplevel(0) + + # single-index (treated as one cross section) + lo = x.quantile(lower_q) + hi = x.quantile(upper_q) + return x.clip(lo, hi) + + +def zscore_by_date(x: pd.Series) -> pd.Series: + """Z-score cross-sectional values per date.""" + if isinstance(x.index, pd.MultiIndex): + level = 0 + def _z(s: pd.Series) -> pd.Series: + std = s.std(ddof=0) + if std == 0 or pd.isna(std): + return s * 0 + return (s - s.mean()) / std + out = x.groupby(level=level).apply(_z) + return out.droplevel(0) + + std = x.std(ddof=0) + if std == 0 or pd.isna(std): + return x * 0 + return (x - x.mean()) / std diff --git a/src/qfr/metrics.py b/src/qfr/metrics.py new file mode 100644 index 0000000..5a5c21e --- /dev/null +++ b/src/qfr/metrics.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import pandas as pd + + +def information_coefficient(factor: pd.Series, forward_return: pd.Series, method: str = "spearman") -> pd.Series: + """Compute per-date IC between factor and next-period return. + + Inputs should be aligned Series with a MultiIndex (date, asset). + Returns a Series indexed by date. + """ + if not isinstance(factor.index, pd.MultiIndex) or not isinstance(forward_return.index, pd.MultiIndex): + raise ValueError("factor and forward_return must be MultiIndex (date, asset)") + + df = pd.concat({"factor": factor, "ret": forward_return}, axis=1).dropna() + if df.empty: + return pd.Series(dtype=float) + + def _ic(g: pd.DataFrame) -> float: + if g["factor"].nunique() < 2 or g["ret"].nunique() < 2: + return float("nan") + return g["factor"].corr(g["ret"], method=method) + + return df.groupby(level=0).apply(_ic) diff --git a/src/qfr/strategy/etf_trend.py b/src/qfr/strategy/etf_trend.py new file mode 100644 index 0000000..0565ce2 --- /dev/null +++ b/src/qfr/strategy/etf_trend.py @@ -0,0 +1,584 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +import pandas as pd + + +@dataclass(frozen=True) +class TrendParams: + # portfolio construction + max_positions: int = 3 + desired_positions_min: int = 2 + desired_positions_max: int = 3 + + # signal update frequency (days) + rebalance_every: int = 1 + + # event-driven replacement (avoid frequent weight tweaking) + replace_score_gap: float = 0.80 # replace weakest if candidate_score >= weakest_score + gap + max_replaces_per_day: int = 1 + + # trend filter: MA cross + sma_fast: int = 5 + sma_slow: int = 20 + + # exits (risk control) + atr_window: int = 14 + + # base chandelier stop (acts as trailing stop) + atr_mult: float = 3.2 + + # profit-protection: once a position is "in profit enough", tighten the trailing stop + profit_tighten_atr: float = 4.0 # activate when (highest_close-entry) >= this*ATR + atr_mult_profit: float = 2.0 # chandelier multiplier after activation + + # hard stop-loss from entry + stop_loss_atr: float = 3.2 # close < entry - stop_loss_atr*ATR + + # asymmetric risk control (fast-run style) + regime_confirm_days: int = 3 # require N consecutive days to switch regimes + bull_atr_mult: float = 3.2 # risk-on trailing stop multiplier + bear_atr_mult: float = 2.6 # risk-off trailing stop multiplier (tighter) + bear_stop_loss_atr: float = 2.6 # risk-off hard stop (tighter) + + asym_enabled: bool = False # default off: preserve baseline behavior + + # macro regime / market consistency (coarse, pandas/numpy native) + macro_min_breadth: float = 0.15 # min fraction of equity assets trend_ok + macro_down_frac: float = 0.80 # down-day breadth threshold to treat as consistent down + macro_scale_risk_off: float = 0.00 # compatibility (unused; event-driven allows empty) + + # anomaly exits + bias_window: int = 20 # compute bias vs this MA + bias_exit: float = 0.18 # abs(close/MA - 1) >= this => exit + + vol_short: int = 5 + vol_long: int = 20 + vol_ratio_exit: float = 3.0 # volume ratio short/long >= this => exit (blow-off) + + # ranking / entry filters + min_score: float = 0.3 + score_vol_denom_floor: float = 0.02 + trend_strength_weight: float = 0.60 + + # momentum score weights (focus on strong short/mid-term) + w_r5: float = 0.25 + w_r20: float = 0.45 + w_r60: float = 0.20 + w_r120: float = 0.10 + + # trading hygiene + min_history_days: int = 120 # new listing protection: require enough history + cooldown_days: int = 5 # after an exit, avoid immediate re-entry + min_hold_days: int = 3 # do not switch too fast (hard stops still apply) + + # churn controls + lazy_days: int = 5 # minimum days between switches (event-driven) + + # keep these fields for compatibility with older scripts + rebalance_band: float = 0.06 + vol_window: int = 20 + max_weight_per_asset: float = 0.90 + concentration_power: float = 2.2 + port_vol_window: int = 60 + target_ann_vol: float = 0.25 + new_asset_days: int = 30 + new_asset_max_w: float = 0.20 + + +@dataclass +class UniverseAsset: + ts_code: str + asset_class: str + name: str | None = None + + +def classify_asset(asset_class: str) -> str: + if asset_class.startswith("equity"): + return "equity" + if asset_class.startswith("rates"): + return "rates" + if asset_class.startswith("commodity"): + return "commodity" + return "other" + + +@dataclass +class Constraints: + max_positions: int = 3 + + # structural constraints (set to 0 for no restrictions) + must_commodity: int = 0 + must_rates: int = 0 + must_equity: int = 0 + + +def _sma(s: pd.Series, n: int) -> pd.Series: + return s.rolling(n, min_periods=n).mean() + + +def _true_range(high: pd.Series, low: pd.Series, close: pd.Series) -> pd.Series: + prev_close = close.shift(1) + tr1 = high - low + tr2 = (high - prev_close).abs() + tr3 = (low - prev_close).abs() + return pd.concat([tr1, tr2, tr3], axis=1).max(axis=1) + + +def _atr(high: pd.Series, low: pd.Series, close: pd.Series, n: int) -> pd.Series: + tr = _true_range(high, low, close) + return tr.rolling(n, min_periods=n).mean() + + +def compute_features(df: pd.DataFrame, params: TrendParams) -> pd.DataFrame: + out = df.copy() + out = out.sort_values("trade_date").reset_index(drop=True) + + close = out["close"].astype(float) + out["ret1"] = close.pct_change() + + for n in (5, 10, 20, 60, 120, 240): + out[f"r{n}"] = close / close.shift(n) - 1.0 + + out["ma_fast"] = _sma(close, params.sma_fast) + out["ma_slow"] = _sma(close, params.sma_slow) + out["trend_ok"] = out["ma_fast"] > out["ma_slow"] + + out["trend_strength"] = (out["ma_fast"] / out["ma_slow"] - 1.0).replace([np.inf, -np.inf], np.nan) + + out["ma_bias"] = _sma(close, params.bias_window) + out["bias"] = (close / out["ma_bias"] - 1.0).replace([np.inf, -np.inf], np.nan) + + if {"high", "low"}.issubset(out.columns): + high = out["high"].astype(float) + low = out["low"].astype(float) + out["atr"] = _atr(high, low, close, params.atr_window) + else: + out["atr"] = close.diff().abs().rolling(params.atr_window, min_periods=params.atr_window).mean() + + # volume/amount anomaly detection (Tushare fund_daily provides vol, amount) + if "amount" in out.columns: + amt = pd.to_numeric(out["amount"], errors="coerce") + out["amt_s"] = amt.rolling(params.vol_short, min_periods=params.vol_short).mean() + out["amt_l"] = amt.rolling(params.vol_long, min_periods=params.vol_long).mean() + out["vol_ratio"] = (out["amt_s"] / out["amt_l"]).replace([np.inf, -np.inf], np.nan) + elif "vol" in out.columns: + v = pd.to_numeric(out["vol"], errors="coerce") + out["amt_s"] = v.rolling(params.vol_short, min_periods=params.vol_short).mean() + out["amt_l"] = v.rolling(params.vol_long, min_periods=params.vol_long).mean() + out["vol_ratio"] = (out["amt_s"] / out["amt_l"]).replace([np.inf, -np.inf], np.nan) + else: + out["vol_ratio"] = np.nan + + # vol proxy for score normalization + ret1 = out["ret1"] + vol = ret1.rolling(params.vol_window, min_periods=params.vol_window).std() * np.sqrt(252.0) + out["vol"] = vol + + # additional scoring signals (no new hyper-params) + vol20 = ret1.rolling(20, min_periods=20).std() * np.sqrt(252.0) + out["vol20"] = vol20 + out["dd20"] = close / close.rolling(20, min_periods=20).max() - 1.0 + out["dd60"] = close / close.rolling(60, min_periods=60).max() - 1.0 + + # momentum score + score_raw = params.w_r5 * out["r5"] + params.w_r20 * out["r20"] + params.w_r60 * out["r60"] + params.w_r120 * out["r120"] + + # Add: (a) prefer closer to 20/60-day highs (less negative drawdown) + # and (b) modestly penalize high 20-day realized volatility. + score_raw = score_raw + 0.20 * params.w_r20 * out["dd20"] + 0.20 * params.w_r60 * out["dd60"] - 0.05 * out["vol20"] + + denom = vol.clip(lower=params.score_vol_denom_floor).replace([np.inf, -np.inf], np.nan) + score = score_raw / denom + params.trend_strength_weight * out["trend_strength"] + + out["score_raw"] = score_raw + out["score"] = score + + return out + + +def run_backtest( + prices: dict[str, pd.DataFrame], + universe: list[UniverseAsset], + constraints: Constraints, + params: TrendParams, + rates_fallback: str, + risk_proxy: str, +) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + feats: dict[str, pd.DataFrame] = {ts: compute_features(df, params) for ts, df in prices.items()} + + meta = {a.ts_code: a for a in universe} + + if risk_proxy not in feats: + eq = [a.ts_code for a in universe if classify_asset(a.asset_class) == "equity" and a.ts_code in feats] + if not eq: + raise RuntimeError(f"risk_proxy {risk_proxy} not in feats and no equity assets") + risk_proxy = eq[0] + + all_dates = feats[risk_proxy]["trade_date"].astype(str).sort_values().tolist() + if not all_dates: + raise RuntimeError("risk_proxy has no trade_date") + + close_px = pd.DataFrame(index=all_dates) + ret1 = pd.DataFrame(index=all_dates) + for ts, f in feats.items(): + g = f.set_index("trade_date").reindex(all_dates) + close_px[ts] = g["close"].astype(float) + ret1[ts] = close_px[ts].pct_change().fillna(0.0) + + weights = pd.DataFrame(0.0, index=all_dates, columns=close_px.columns) + + # state + in_pos: list[str] = [] + highest_close: dict[str, float] = {} + entry_price: dict[str, float] = {} + entry_idx: dict[str, int] = {} + cooldown_until: dict[str, int] = {} + last_switch_idx = -10**9 + + # regime state (macro risk-off) with confirmation to avoid daily flip-flops + risk_off_state = False + risk_off_streak = 0 + risk_on_streak = 0 + + # first valid index per asset + first_valid_idx: dict[str, int] = {} + for ts, f in feats.items(): + ok = f[["ma_fast", "ma_slow", "vol", "score"]].notna().all(axis=1) + valid_dates = f.loc[ok, "trade_date"].astype(str).tolist() + if not valid_dates: + continue + d0 = valid_dates[0] + try: + first_valid_idx[ts] = all_dates.index(d0) + except ValueError: + pass + + atr_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["atr"].astype(float) for ts in close_px.columns} + mf_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_fast"].astype(float) for ts in close_px.columns} + ms_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["ma_slow"].astype(float) for ts in close_px.columns} + bias_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["bias"].astype(float) for ts in close_px.columns} + vr_map = {ts: feats[ts].set_index("trade_date").reindex(all_dates)["vol_ratio"].astype(float) for ts in close_px.columns} + + trades: list[dict[str, object]] = [] + + def can_trade(ts: str, i: int) -> bool: + fv = first_valid_idx.get(ts) + if fv is None: + return False + if i < fv + params.min_history_days: + return False + cu = cooldown_until.get(ts) + if cu is not None and i < cu: + return False + return True + + def current_holdings_from_weights(d: str) -> list[str]: + return [ts for ts in weights.columns if ts != rates_fallback and float(weights.loc[d, ts]) > 1e-12] + + def set_portfolio(d: str, new_hold: list[str], reason: str) -> None: + # equal weight among holdings; remainder to rates_fallback + w = pd.Series(0.0, index=weights.columns) + k = len(new_hold) + if k > 0: + w_per = 1.0 / k + for ts in new_hold: + if ts in w.index: + w.loc[ts] = w_per + rem = 1.0 - float(w.sum()) + if rem > 1e-12 and rates_fallback in w.index: + w.loc[rates_fallback] += rem + + prev = pd.Series(0.0, index=weights.columns) + if d != all_dates[0]: + prev = weights.loc[all_dates[all_dates.index(d) - 1]].copy() + + weights.loc[d] = w + + for ts in weights.columns: + if ts == rates_fallback: + continue + w0 = float(prev.loc[ts]) + w1 = float(w.loc[ts]) + if abs(w1 - w0) <= 1e-12: + continue + side = "BUY" if w1 > w0 else "SELL" + trades.append( + { + "trade_date": d, + "ts_code": ts, + "side": side, + "reason": reason, + "weight_before": w0, + "weight_after": w1, + "price": float(close_px.loc[d, ts]), + } + ) + + for i, d in enumerate(all_dates): + if i > 0: + weights.loc[d] = weights.iloc[i - 1] + + # update trailing highest close + for ts in list(in_pos): + c = float(close_px.loc[d, ts]) + if np.isfinite(c): + highest_close[ts] = max(highest_close.get(ts, c), c) + + allow_ts = {ts for ts in close_px.columns if can_trade(ts, i)} + + # snapshot (vectorized via reindex on date) + snap_rows = [] + for ts, f in feats.items(): + g = f.set_index("trade_date") + if d not in g.index: + continue + r = g.loc[d] + snap_rows.append( + ( + ts, + bool(r.get("trend_ok")) if pd.notna(r.get("trend_ok")) else False, + float(r.get("score")) if pd.notna(r.get("score")) else float("nan"), + ) + ) + snap = pd.DataFrame(snap_rows, columns=["ts_code", "trend_ok", "score"]).set_index("ts_code") + snap = snap[snap.index.isin(list(allow_ts))] + + # consistency-aware timing: only go risk-off when proxy trend down AND the market is consistently down + proxy_mf = float(mf_map[risk_proxy].loc[d]) + proxy_ms = float(ms_map[risk_proxy].loc[d]) + proxy_down = np.isfinite(proxy_mf) and np.isfinite(proxy_ms) and (proxy_mf < proxy_ms) + + eq_all = [ts for ts, a in meta.items() if classify_asset(a.asset_class) == "equity" and ts in snap.index] + breadth = 1.0 + down_frac = 0.0 + if eq_all: + ok = snap.loc[eq_all, "trend_ok"].fillna(False) + breadth = float(ok.mean()) + rets = ret1.loc[d, eq_all].fillna(0.0) + down_frac = float((rets < 0).mean()) + + consistent_down = proxy_down and (breadth < params.macro_min_breadth) and (down_frac >= params.macro_down_frac) + + # Confirm regime shifts to avoid whipsaw switching + if consistent_down: + risk_off_streak += 1 + risk_on_streak = 0 + else: + risk_on_streak += 1 + risk_off_streak = 0 + + if (not risk_off_state) and risk_off_streak >= int(params.regime_confirm_days): + risk_off_state = True + elif risk_off_state and risk_on_streak >= int(params.regime_confirm_days): + risk_off_state = False + + if not params.asym_enabled: + # Preserve baseline: use raw consistent_down + risk_off = consistent_down + else: + risk_off = risk_off_state + + # DAILY EXITS (hard) + sold_any = False + for ts in list(in_pos): + c = float(close_px.loc[d, ts]) + mf = float(mf_map[ts].loc[d]) + ms = float(ms_map[ts].loc[d]) + atr = float(atr_map[ts].loc[d]) + h = highest_close.get(ts, c) + epx = entry_price.get(ts, c) + e_i = entry_idx.get(ts, i) + + trend_break = np.isfinite(mf) and np.isfinite(ms) and (mf < ms) + if trend_break and (i - e_i) < params.min_hold_days: + trend_break = False + + if params.asym_enabled: + base_mult = params.bear_atr_mult if risk_off else params.bull_atr_mult + use_mult = base_mult + else: + use_mult = params.atr_mult + if np.isfinite(atr) and (h - epx) >= params.profit_tighten_atr * atr: + use_mult = min(use_mult, params.atr_mult_profit) + + chand_break = np.isfinite(atr) and c < (h - use_mult * atr) + if params.asym_enabled: + sl_atr = params.bear_stop_loss_atr if risk_off else params.stop_loss_atr + else: + sl_atr = params.stop_loss_atr + stop_loss = np.isfinite(atr) and c < (epx - sl_atr * atr) + + bias = float(bias_map[ts].loc[d]) + vr = float(vr_map[ts].loc[d]) + bias_exit = np.isfinite(bias) and abs(bias) >= params.bias_exit + vr_exit = np.isfinite(vr) and vr >= params.vol_ratio_exit + + if trend_break or chand_break or stop_loss or bias_exit or vr_exit: + in_pos.remove(ts) + sold_any = True + highest_close.pop(ts, None) + entry_price.pop(ts, None) + entry_idx.pop(ts, None) + cooldown_until[ts] = i + params.cooldown_days + + if risk_off and in_pos: + for ts in list(in_pos): + cooldown_until[ts] = i + params.cooldown_days + in_pos = [] + sold_any = True + + if sold_any: + set_portfolio(d, in_pos, reason="exit") + + # ENTRY / SWITCH: do not churn - only allow switching every lazy_days + if risk_off: + continue + if (i - last_switch_idx) < int(params.lazy_days): + continue + + cand = snap.copy() + cand = cand[cand["trend_ok"].fillna(False)] + cand = cand[cand["score"].fillna(float("-inf")) >= params.min_score] + if cand.empty: + continue + cand = cand.sort_values("score", ascending=False) + + # Diversification constraint: avoid holding multiple highly-correlated assets. + # We gate additions using both 60d and 120d return correlations with a fallback ladder + # to ensure we can still reach desired_positions_min in highly synchronous regimes. + def _build_corr(window: int) -> pd.DataFrame | None: + if i < window: + return None + sub = ret1.iloc[max(0, i - window + 1) : i + 1] + # corr() handles NaNs pairwise; we treat missing pairs as uncorrelated later. + return sub.corr() + + corr60 = _build_corr(60) + corr120 = _build_corr(120) + + def _max_corr(a: str, b: str, use_short: bool, use_long: bool) -> float: + v = float("-inf") + if use_short and corr60 is not None and a in corr60.index and b in corr60.columns: + x = corr60.at[a, b] + if pd.notna(x): + v = max(v, float(x)) + if use_long and corr120 is not None and a in corr120.index and b in corr120.columns: + x = corr120.at[a, b] + if pd.notna(x): + v = max(v, float(x)) + return v + + def _passes_corr(ts: str, held: list[str], thr: float, use_short: bool, use_long: bool, max_buddies: int) -> bool: + if not held: + return True + buddies = 0 + for h in held: + if _max_corr(ts, h, use_short=use_short, use_long=use_long) >= thr: + buddies += 1 + if buddies > max_buddies: + return False + return True + + # Fallback ladder (route D): + # L0-L2 relax threshold; L3 ignore short-term corr; L4 allow one correlated buddy; L5 disable gating. + _policies = [ + dict(thr=0.95, use_short=True, use_long=True, max_buddies=0), + dict(thr=0.93, use_short=True, use_long=True, max_buddies=0), + dict(thr=0.90, use_short=True, use_long=True, max_buddies=0), + dict(thr=0.90, use_short=False, use_long=True, max_buddies=0), + dict(thr=0.90, use_short=False, use_long=True, max_buddies=1), + dict(thr=-1.0, use_short=False, use_long=False, max_buddies=10**9), + ] + _policy_idx = 0 + + changed = False + + # fill up to desired min (do not apply correlation gating here; keep baseline behavior) + while len(in_pos) < int(params.desired_positions_min): + nxt = None + for ts in cand.index.tolist(): + if ts in in_pos: + continue + nxt = ts + break + if nxt is None: + break + in_pos.append(nxt) + c = float(close_px.loc[d, nxt]) + entry_price[nxt] = c + entry_idx[nxt] = i + highest_close[nxt] = c + changed = True + + desired_max = min(int(params.max_positions), max(int(params.desired_positions_min), int(params.desired_positions_max))) + + # add up to desired max (no corr gating; corr is applied replacement-only) + while len(in_pos) < desired_max: + nxt = None + for ts in cand.index.tolist(): + if ts in in_pos: + continue + nxt = ts + break + if nxt is None: + break + + in_pos.append(nxt) + c = float(close_px.loc[d, nxt]) + entry_price[nxt] = c + entry_idx[nxt] = i + highest_close[nxt] = c + changed = True + + # replacement (single) + if len(in_pos) == desired_max and int(params.max_replaces_per_day) > 0: + held_scores = {ts: float(cand.loc[ts, "score"]) if ts in cand.index else float("-inf") for ts in in_pos} + weak = min(held_scores.items(), key=lambda x: x[1])[0] + weak_score = float(held_scores.get(weak, float("-inf"))) + + best = None + best_score = float("-inf") + pol_rep = _policies[_policy_idx] + held_after = [x for x in in_pos if x != weak] + for ts in cand.index.tolist(): + if ts in in_pos: + continue + if not _passes_corr(ts, held_after, thr=float(pol_rep["thr"]), use_short=bool(pol_rep["use_short"]), use_long=bool(pol_rep["use_long"]), max_buddies=int(pol_rep["max_buddies"])): + continue + sc = float(cand.loc[ts, "score"]) if pd.notna(cand.loc[ts, "score"]) else float("-inf") + if sc > best_score: + best_score = sc + best = ts + + if best is not None and np.isfinite(best_score) and best_score >= (weak_score + float(params.replace_score_gap)): + in_pos = [ts for ts in in_pos if ts != weak] + cooldown_until[weak] = i + params.cooldown_days + highest_close.pop(weak, None) + entry_price.pop(weak, None) + entry_idx.pop(weak, None) + + in_pos.append(best) + c = float(close_px.loc[d, best]) + entry_price[best] = c + entry_idx[best] = i + highest_close[best] = c + changed = True + + cur_hold = current_holdings_from_weights(d) + if set(cur_hold) != set(in_pos): + set_portfolio(d, in_pos, reason="switch") + last_switch_idx = i + + # apply 1-day lag + w_lag = weights.shift(1).fillna(0.0) + port_ret = (ret1 * w_lag).sum(axis=1) + equity = (1.0 + port_ret).cumprod().to_frame("equity") + + trades_df = pd.DataFrame(trades) + if not trades_df.empty: + trades_df = trades_df.sort_values(["trade_date", "ts_code"]).reset_index(drop=True) + + return equity, weights, trades_df diff --git a/web/db.sqlite3 b/web/db.sqlite3 new file mode 100644 index 0000000..e69de29 diff --git a/web/django.log b/web/django.log new file mode 100644 index 0000000..ee6b12b --- /dev/null +++ b/web/django.log @@ -0,0 +1,23 @@ +[06/Mar/2026 16:51:04] "GET /lab/top-trials?limit=3 HTTP/1.1" 200 5202 +[06/Mar/2026 16:51:04] "GET /lab/best-artifacts?limit=1 HTTP/1.1" 200 5103 +[06/Mar/2026 16:52:31] "GET /lab/top-trials HTTP/1.1" 200 22279 +Not Found: /favicon.ico +[06/Mar/2026 16:52:32] "GET /favicon.ico HTTP/1.1" 404 2643 +[06/Mar/2026 16:53:04] "GET / HTTP/1.1" 200 395 +[06/Mar/2026 16:53:06] "GET /lab/best-artifacts HTTP/1.1" 200 5103 +[06/Mar/2026 16:53:09] "GET / HTTP/1.1" 200 395 +[06/Mar/2026 16:53:11] "GET /lab/top-trials HTTP/1.1" 200 22279 +[06/Mar/2026 16:53:40] "GET / HTTP/1.1" 200 395 +[06/Mar/2026 16:53:42] "GET /lab/top-trials HTTP/1.1" 200 22279 +[06/Mar/2026 16:53:43] "GET / HTTP/1.1" 200 395 +[06/Mar/2026 16:53:44] "GET /lab/best-artifacts HTTP/1.1" 200 5103 +[06/Mar/2026 16:54:01] "GET / HTTP/1.1" 200 395 +[07/Mar/2026 08:00:05] "GET / HTTP/1.1" 200 395 +[07/Mar/2026 08:06:46] "GET / HTTP/1.1" 200 395 +[07/Mar/2026 08:07:44] "GET / HTTP/1.1" 200 395 +[07/Mar/2026 08:08:43] "GET /lab/top-trials HTTP/1.1" 200 22445 +[07/Mar/2026 08:09:07] "GET / HTTP/1.1" 200 395 +[07/Mar/2026 08:24:56] "GET / HTTP/1.1" 200 395 +[09/Mar/2026 16:39:05] "GET / HTTP/1.1" 200 395 +[09/Mar/2026 16:39:08] "GET / HTTP/1.1" 200 395 +[11/Mar/2026 15:40:28] "GET / HTTP/1.1" 200 395 diff --git a/web/django.pid b/web/django.pid new file mode 100644 index 0000000..df7fda0 --- /dev/null +++ b/web/django.pid @@ -0,0 +1 @@ +1503125 diff --git a/web/lab/__init__.py b/web/lab/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/lab/admin.py b/web/lab/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/web/lab/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/web/lab/apps.py b/web/lab/apps.py new file mode 100644 index 0000000..f835608 --- /dev/null +++ b/web/lab/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class LabConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'lab' diff --git a/web/lab/migrations/__init__.py b/web/lab/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/lab/models.py b/web/lab/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/web/lab/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/web/lab/templates/lab/best_artifacts.html b/web/lab/templates/lab/best_artifacts.html new file mode 100644 index 0000000..e098def --- /dev/null +++ b/web/lab/templates/lab/best_artifacts.html @@ -0,0 +1,43 @@ + + + + + Best Artifacts + + + +

Best Artifacts

+

limit={{ limit }}

+

Home

+ + + + + + + + + {% for r in rows %} + + + + + + + + + + + + + + {% endfor %} + +
idts_utcann_returnann_volmax_ddtpyequityweightstradesconfigparams
{{ r.id }}{{ r.ts_utc }}{{ r.ann_return }}{{ r.ann_vol }}{{ r.max_drawdown }}{{ r.trades_per_year }}{{ r.out_equity }}{{ r.out_weights }}{{ r.out_trades }}{{ r.config_path }}
{{ r.params_json }}
+ + diff --git a/web/lab/templates/lab/index.html b/web/lab/templates/lab/index.html new file mode 100644 index 0000000..b714296 --- /dev/null +++ b/web/lab/templates/lab/index.html @@ -0,0 +1,15 @@ + + + + + QFR Lab + + +

QFR Lab

+

Experiments DB: {{ db_path }}

+ + + diff --git a/web/lab/templates/lab/top_trials.html b/web/lab/templates/lab/top_trials.html new file mode 100644 index 0000000..799431c --- /dev/null +++ b/web/lab/templates/lab/top_trials.html @@ -0,0 +1,42 @@ + + + + + Top Trials + + + +

Top Trials (ann_return)

+

Summary: {{ summary }}

+

limit={{ limit }}

+

Home

+ + + + + + + + + + {% for r in rows %} + + + + + + + + + + + + {% endfor %} + +
idann_returnann_volmax_drawdownsharpetpyts_utcrun_idconfig
{{ r.id }}{{ r.ann_return }}{{ r.ann_vol }}{{ r.max_drawdown }}{{ r.sharpe }}{{ r.trades_per_year }}{{ r.ts_utc }}{{ r.run_id }}{{ r.config_path }}
+ + diff --git a/web/lab/tests.py b/web/lab/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/web/lab/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/web/lab/urls.py b/web/lab/urls.py new file mode 100644 index 0000000..8ab9ef0 --- /dev/null +++ b/web/lab/urls.py @@ -0,0 +1,11 @@ +from __future__ import annotations + +from django.urls import path + +from . import views + +urlpatterns = [ + path("", views.index, name="index"), + path("lab/top-trials", views.top_trials, name="top_trials"), + path("lab/best-artifacts", views.best_artifacts, name="best_artifacts"), +] diff --git a/web/lab/views.py b/web/lab/views.py new file mode 100644 index 0000000..7e5bd4a --- /dev/null +++ b/web/lab/views.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import sqlite3 +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from django.conf import settings +from django.http import HttpRequest, HttpResponse +from django.shortcuts import render + + +@dataclass +class TrialRow: + ann_return: float | None + ann_vol: float | None + max_drawdown: float | None + sharpe: float | None + trades_per_year: float | None + ts_utc: str | None + run_id: str | None + config_path: str | None + + +def _db_path() -> Path: + return Path(getattr(settings, "QFR_EXPERIMENTS_DB")) + + +def _connect() -> sqlite3.Connection: + con = sqlite3.connect(str(_db_path())) + con.row_factory = sqlite3.Row + return con + + +def index(request: HttpRequest) -> HttpResponse: + return render( + request, + "lab/index.html", + { + "db_path": str(_db_path()), + }, + ) + + +def top_trials(request: HttpRequest) -> HttpResponse: + limit = int(request.GET.get("limit", "50")) + limit = max(10, min(500, limit)) + + rows: list[dict[str, Any]] = [] + summary: dict[str, Any] = {} + with _connect() as con: + cur = con.execute( + """ + SELECT ann_return, ann_vol, max_drawdown, sharpe, trades_per_year, ts_utc, run_id, config_path + FROM trials + WHERE ann_return IS NOT NULL + ORDER BY ann_return DESC + LIMIT ? + """, + (limit,), + ) + rows = [dict(r) for r in cur.fetchall()] + + cur2 = con.execute( + """ + SELECT + COUNT(*) AS n, + MAX(ann_return) AS best_ann, + MIN(max_drawdown) AS worst_dd, + MIN(trades_per_year) AS min_tpy, + MAX(trades_per_year) AS max_tpy + FROM trials + """ + ) + summary = dict(cur2.fetchone()) + + return render( + request, + "lab/top_trials.html", + { + "rows": rows, + "summary": summary, + "limit": limit, + }, + ) + + +def best_artifacts(request: HttpRequest) -> HttpResponse: + limit = int(request.GET.get("limit", "50")) + limit = max(10, min(500, limit)) + + rows: list[dict[str, Any]] = [] + with _connect() as con: + cur = con.execute( + """ + SELECT id, ts_utc, config_path, out_equity, out_weights, out_trades, + ann_return, ann_vol, max_drawdown, trades_per_year, params_json + FROM best_artifacts + ORDER BY id DESC + LIMIT ? + """, + (limit,), + ) + rows = [dict(r) for r in cur.fetchall()] + + return render( + request, + "lab/best_artifacts.html", + { + "rows": rows, + "limit": limit, + }, + ) diff --git a/web/manage.py b/web/manage.py new file mode 100755 index 0000000..b1d9cbf --- /dev/null +++ b/web/manage.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +"""Django's command-line utility for administrative tasks.""" +import os +import sys + + +def main(): + """Run administrative tasks.""" + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'qfrweb.settings') + try: + from django.core.management import execute_from_command_line + except ImportError as exc: + raise ImportError( + "Couldn't import Django. Are you sure it's installed and " + "available on your PYTHONPATH environment variable? Did you " + "forget to activate a virtual environment?" + ) from exc + execute_from_command_line(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/web/qfrweb/__init__.py b/web/qfrweb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/qfrweb/asgi.py b/web/qfrweb/asgi.py new file mode 100644 index 0000000..059ce64 --- /dev/null +++ b/web/qfrweb/asgi.py @@ -0,0 +1,16 @@ +""" +ASGI config for qfrweb project. + +It exposes the ASGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/howto/deployment/asgi/ +""" + +import os + +from django.core.asgi import get_asgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'qfrweb.settings') + +application = get_asgi_application() diff --git a/web/qfrweb/settings.py b/web/qfrweb/settings.py new file mode 100644 index 0000000..5104f21 --- /dev/null +++ b/web/qfrweb/settings.py @@ -0,0 +1,127 @@ +""" +Django settings for qfrweb project. + +Generated by 'django-admin startproject' using Django 5.0.3. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/topics/settings/ + +For the full list of settings and their values, see +https://docs.djangoproject.com/en/5.0/ref/settings/ +""" + +from pathlib import Path + +# Build paths inside the project like this: BASE_DIR / 'subdir'. +BASE_DIR = Path(__file__).resolve().parent.parent + + +# Quick-start development settings - unsuitable for production +# See https://docs.djangoproject.com/en/5.0/howto/deployment/checklist/ + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = 'django-insecure-mi&&j81fxs8ni#1x%9y64h0^k+&@7xd*=q-5=4_i@+dasay7q1' + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = True + +ALLOWED_HOSTS = ["*"] + + +# Application definition + +INSTALLED_APPS = [ + "lab", + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.sessions', + 'django.contrib.messages', + 'django.contrib.staticfiles', +] + +MIDDLEWARE = [ + 'django.middleware.security.SecurityMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.middleware.common.CommonMiddleware', + 'django.middleware.csrf.CsrfViewMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.contrib.messages.middleware.MessageMiddleware', + 'django.middleware.clickjacking.XFrameOptionsMiddleware', +] + +ROOT_URLCONF = 'qfrweb.urls' + +TEMPLATES = [ + { + 'BACKEND': 'django.template.backends.django.DjangoTemplates', + 'DIRS': [], + 'APP_DIRS': True, + 'OPTIONS': { + 'context_processors': [ + 'django.template.context_processors.debug', + 'django.template.context_processors.request', + 'django.contrib.auth.context_processors.auth', + 'django.contrib.messages.context_processors.messages', + ], + }, + }, +] + +WSGI_APPLICATION = 'qfrweb.wsgi.application' + + +# Database +# https://docs.djangoproject.com/en/5.0/ref/settings/#databases + +DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': BASE_DIR / 'db.sqlite3', + } +} + + +# Password validation +# https://docs.djangoproject.com/en/5.0/ref/settings/#auth-password-validators + +AUTH_PASSWORD_VALIDATORS = [ + { + 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', + }, + { + 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', + }, +] + + +# Internationalization +# https://docs.djangoproject.com/en/5.0/topics/i18n/ + +LANGUAGE_CODE = 'en-us' + +TIME_ZONE = 'Asia/Shanghai' + +USE_I18N = True + +USE_TZ = True + + +# Static files (CSS, JavaScript, Images) +# https://docs.djangoproject.com/en/5.0/howto/static-files/ + +STATIC_URL = 'static/' + +# Default primary key field type +# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field + +DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' + +# QFR +QFR_EXPERIMENTS_DB = BASE_DIR.parent / 'data' / 'experiments.sqlite' diff --git a/web/qfrweb/urls.py b/web/qfrweb/urls.py new file mode 100644 index 0000000..14722cf --- /dev/null +++ b/web/qfrweb/urls.py @@ -0,0 +1,23 @@ +""" +URL configuration for qfrweb project. + +The `urlpatterns` list routes URLs to views. For more information please see: + https://docs.djangoproject.com/en/5.0/topics/http/urls/ +Examples: +Function views + 1. Add an import: from my_app import views + 2. Add a URL to urlpatterns: path('', views.home, name='home') +Class-based views + 1. Add an import: from other_app.views import Home + 2. Add a URL to urlpatterns: path('', Home.as_view(), name='home') +Including another URLconf + 1. Import the include() function: from django.urls import include, path + 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) +""" +from django.contrib import admin +from django.urls import path, include + +urlpatterns = [ + path("", include("lab.urls")), + path('admin/', admin.site.urls), +] diff --git a/web/qfrweb/wsgi.py b/web/qfrweb/wsgi.py new file mode 100644 index 0000000..3a3d3a1 --- /dev/null +++ b/web/qfrweb/wsgi.py @@ -0,0 +1,16 @@ +""" +WSGI config for qfrweb project. + +It exposes the WSGI callable as a module-level variable named ``application``. + +For more information on this file, see +https://docs.djangoproject.com/en/5.0/howto/deployment/wsgi/ +""" + +import os + +from django.core.wsgi import get_wsgi_application + +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'qfrweb.settings') + +application = get_wsgi_application()