initial
This commit is contained in:
commit
e86c3abc53
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
.history/
|
||||
daily_data/
|
||||
285
notebooks/stocks_cointegration.ipynb
Normal file
285
notebooks/stocks_cointegration.ipynb
Normal file
@ -0,0 +1,285 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8dd25d03",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Cointegration Analysis of Two Stocks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5a1ced6f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## --- Cell 1: Input symbols ---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "cebf9c69",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"symbol_a = \"AAPL\"\n",
|
||||
"symbol_b = \"MSFT\"\n",
|
||||
"\n",
|
||||
"API_KEY = \"hV11XYhaase6vxw2qmhh\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "d20bf0f1",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# --- Cell 2: Imports ---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "bff2de78",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"import yfinance as yf\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from statsmodels.tsa.stattools import coint\n",
|
||||
"from statsmodels.tsa.vector_ar.vecm import coint_johansen\n",
|
||||
"import plotly.graph_objects as go\n",
|
||||
"import plotly.subplots as sp\n",
|
||||
"import quandl\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4c27469d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## --- Cell 3: Load Data ---"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f79c2d26",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Error downloading AAPL from Nasdaq Data Link (EOD): (Status 403) Something went wrong. Please try again. If you continue to have problems, please contact us at connect@quandl.com.\n",
|
||||
"Error downloading MSFT from Nasdaq Data Link (EOD): (Status 403) Something went wrong. Please try again. If you continue to have problems, please contact us at connect@quandl.com.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "All objects passed were None",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mValueError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 16\u001b[39m\n\u001b[32m 13\u001b[39m daily_b = get_nasdaq_close(symbol_b)\n\u001b[32m 15\u001b[39m \u001b[38;5;66;03m# Filter to the last 180 days of overlap\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m16\u001b[39m daily = \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mconcat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdaily_a\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdaily_b\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m)\u001b[49m.dropna().last(\u001b[33m\"\u001b[39m\u001b[33m180D\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 17\u001b[39m daily.columns = [symbol_a, symbol_b]\n\u001b[32m 20\u001b[39m intraday = pd.concat([intraday_a, intraday_b], axis=\u001b[32m1\u001b[39m).dropna()\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/core/reshape/concat.py:382\u001b[39m, in \u001b[36mconcat\u001b[39m\u001b[34m(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)\u001b[39m\n\u001b[32m 379\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m copy \u001b[38;5;129;01mand\u001b[39;00m using_copy_on_write():\n\u001b[32m 380\u001b[39m copy = \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m382\u001b[39m op = \u001b[43m_Concatenator\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 383\u001b[39m \u001b[43m \u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 384\u001b[39m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m=\u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 385\u001b[39m \u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m=\u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 386\u001b[39m \u001b[43m \u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 387\u001b[39m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 388\u001b[39m \u001b[43m \u001b[49m\u001b[43mlevels\u001b[49m\u001b[43m=\u001b[49m\u001b[43mlevels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 389\u001b[39m \u001b[43m \u001b[49m\u001b[43mnames\u001b[49m\u001b[43m=\u001b[49m\u001b[43mnames\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 390\u001b[39m \u001b[43m \u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[43m=\u001b[49m\u001b[43mverify_integrity\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 391\u001b[39m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 392\u001b[39m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[43m=\u001b[49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 393\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 395\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m op.get_result()\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/core/reshape/concat.py:445\u001b[39m, in \u001b[36m_Concatenator.__init__\u001b[39m\u001b[34m(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)\u001b[39m\n\u001b[32m 442\u001b[39m \u001b[38;5;28mself\u001b[39m.verify_integrity = verify_integrity\n\u001b[32m 443\u001b[39m \u001b[38;5;28mself\u001b[39m.copy = copy\n\u001b[32m--> \u001b[39m\u001b[32m445\u001b[39m objs, keys = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_clean_keys_and_objs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 447\u001b[39m \u001b[38;5;66;03m# figure out what our result ndim is going to be\u001b[39;00m\n\u001b[32m 448\u001b[39m ndims = \u001b[38;5;28mself\u001b[39m._get_ndims(objs)\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/.pyenv/python3.12-venv/lib/python3.12/site-packages/pandas/core/reshape/concat.py:541\u001b[39m, in \u001b[36m_Concatenator._clean_keys_and_objs\u001b[39m\u001b[34m(self, objs, keys)\u001b[39m\n\u001b[32m 538\u001b[39m keys = Index(clean_keys, name=name, dtype=\u001b[38;5;28mgetattr\u001b[39m(keys, \u001b[33m\"\u001b[39m\u001b[33mdtype\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[32m 540\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(objs_list) == \u001b[32m0\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m541\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mAll objects passed were None\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 543\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m objs_list, keys\n",
|
||||
"\u001b[31mValueError\u001b[39m: All objects passed were None"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\n",
|
||||
"\n",
|
||||
"quandl.ApiConfig.api_key = API_KEY\n",
|
||||
"\n",
|
||||
"def get_nasdaq_close(symbol):\n",
|
||||
" try:\n",
|
||||
" data = quandl.get(f\"EOD/{symbol}\", start_date=\"2023-01-01\")\n",
|
||||
" return data['Adj_Close']\n",
|
||||
" except Exception as e:\n",
|
||||
" print(f\"Error downloading {symbol} from Nasdaq Data Link (EOD):\", e)\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
"# Daily (from Nasdaq Data Link)\n",
|
||||
"daily_a = get_nasdaq_close(symbol_a)\n",
|
||||
"daily_b = get_nasdaq_close(symbol_b)\n",
|
||||
"\n",
|
||||
"# Filter to the last 180 days of overlap\n",
|
||||
"daily = pd.concat([daily_a, daily_b], axis=1).dropna().last(\"180D\")\n",
|
||||
"daily.columns = [symbol_a, symbol_b]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"intraday = pd.concat([intraday_a, intraday_b], axis=1).dropna()\n",
|
||||
"intraday.columns = [symbol_a, symbol_b]\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "5b2bf2ce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Cell 4: Cointegration Step 1 (Daily) ---\n",
|
||||
"# Engle-Granger (ADF)\n",
|
||||
"def engle_granger(a, b):\n",
|
||||
" model = sm.OLS(a, sm.add_constant(b)).fit()\n",
|
||||
" spread = model.resid\n",
|
||||
" adf_stat, pvalue, _ = sm.tsa.adfuller(spread)\n",
|
||||
" return pvalue\n",
|
||||
"\n",
|
||||
"# Johansen Test\n",
|
||||
"def johansen_test(df):\n",
|
||||
" result = coint_johansen(df, det_order=0, k_ar_diff=1)\n",
|
||||
" trace_stat = result.lr1\n",
|
||||
" crit_vals = result.cvt[:, 1] # 5% level\n",
|
||||
" return trace_stat[0] > crit_vals[0]\n",
|
||||
"\n",
|
||||
"# Rolling Validation (30-day window)\n",
|
||||
"rolling_pvalues = []\n",
|
||||
"rolling_johansen = []\n",
|
||||
"window = 30\n",
|
||||
"for i in range(window, len(daily)):\n",
|
||||
" a_slice = daily[symbol_a].iloc[i - window:i]\n",
|
||||
" b_slice = daily[symbol_b].iloc[i - window:i]\n",
|
||||
" df_slice = pd.concat([a_slice, b_slice], axis=1)\n",
|
||||
" pval = engle_granger(a_slice, b_slice)\n",
|
||||
" rolling_pvalues.append(pval)\n",
|
||||
" rolling_johansen.append(johansen_test(df_slice))\n",
|
||||
"\n",
|
||||
"mean_pval = np.mean(rolling_pvalues)\n",
|
||||
"passed_johansen_ratio = np.mean(rolling_johansen)\n",
|
||||
"\n",
|
||||
"print(\"\\nDaily Cointegration Results (Rolling 30-day):\")\n",
|
||||
"print(f\"Average Engle-Granger ADF p-value: {mean_pval:.4f}\")\n",
|
||||
"print(f\"Johansen test passed in {passed_johansen_ratio * 100:.1f}% of windows\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1fce1bd8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Cell 5: Cointegration Step 2 (Intraday) ---\n",
|
||||
"# Rolling hedge ratio (60-min window)\n",
|
||||
"window_hr = 60\n",
|
||||
"rolling_hedge_ratios = []\n",
|
||||
"for i in range(window_hr, len(intraday)):\n",
|
||||
" X = intraday[symbol_b].iloc[i - window_hr:i]\n",
|
||||
" y = intraday[symbol_a].iloc[i - window_hr:i]\n",
|
||||
" model = sm.OLS(y, sm.add_constant(X)).fit()\n",
|
||||
" rolling_hedge_ratios.append(model.params[1])\n",
|
||||
"\n",
|
||||
"# Extend hedge ratio to full length\n",
|
||||
"rolling_hedge_ratios = pd.Series(rolling_hedge_ratios, index=intraday.index[window_hr:])\n",
|
||||
"hedge_ratio_full = rolling_hedge_ratios.reindex(intraday.index, method='ffill').fillna(method='bfill')\n",
|
||||
"\n",
|
||||
"# Spread and Z-score\n",
|
||||
"spread = intraday[symbol_a] - hedge_ratio_full * intraday[symbol_b]\n",
|
||||
"zscore = (spread - spread.rolling(60).mean()) / spread.rolling(60).std()\n",
|
||||
"\n",
|
||||
"# --- Cell 6: Trading Signals ---\n",
|
||||
"entry_threshold = 2\n",
|
||||
"exit_threshold = 0.5\n",
|
||||
"transaction_cost = 0.0005 # 5 basis points per leg\n",
|
||||
"\n",
|
||||
"signals = pd.DataFrame(index=intraday.index)\n",
|
||||
"signals['zscore'] = zscore\n",
|
||||
"signals['position'] = 0\n",
|
||||
"signals.loc[signals['zscore'] > entry_threshold, 'position'] = -1 # short A, long B\n",
|
||||
"signals.loc[signals['zscore'] < -entry_threshold, 'position'] = 1 # long A, short B\n",
|
||||
"signals['position'] = signals['position'].where(~signals['zscore'].between(-exit_threshold, exit_threshold), 0)\n",
|
||||
"signals['position'] = signals['position'].ffill().fillna(0)\n",
|
||||
"\n",
|
||||
"# PnL simulation\n",
|
||||
"returns_a = intraday[symbol_a].pct_change().fillna(0)\n",
|
||||
"returns_b = intraday[symbol_b].pct_change().fillna(0)\n",
|
||||
"hedge_ratio_series = hedge_ratio_full.shift(1)\n",
|
||||
"\n",
|
||||
"# Gross PnL\n",
|
||||
"signals['pnl'] = signals['position'].shift(1) * (returns_a - hedge_ratio_series * returns_b)\n",
|
||||
"\n",
|
||||
"# Transaction cost estimation\n",
|
||||
"signals['trades'] = signals['position'].diff().abs()\n",
|
||||
"signals['costs'] = signals['trades'] * transaction_cost * (1 + hedge_ratio_series)\n",
|
||||
"signals['net_pnl'] = signals['pnl'] - signals['costs']\n",
|
||||
"signals['cum_pnl'] = signals['net_pnl'].cumsum()\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "04d10694",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Cell 7: Export Results ---\n",
|
||||
"signals.to_csv(\"signals_and_pnl.csv\")\n",
|
||||
"print(\"Exported: signals_and_pnl.csv\")\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "57df70b6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Cell 8: Visualization ---\n",
|
||||
"fig = sp.make_subplots(rows=3, cols=1, shared_xaxes=True,\n",
|
||||
" subplot_titles=(\"Price Series (1-min)\", \"Spread Z-Score\", \"Cumulative PnL (Net)\",))\n",
|
||||
"\n",
|
||||
"# Price series\n",
|
||||
"fig.add_trace(go.Scatter(x=intraday.index, y=intraday[symbol_a], name=symbol_a), row=1, col=1)\n",
|
||||
"fig.add_trace(go.Scatter(x=intraday.index, y=intraday[symbol_b], name=symbol_b), row=1, col=1)\n",
|
||||
"\n",
|
||||
"# Z-score\n",
|
||||
"fig.add_trace(go.Scatter(x=signals.index, y=signals['zscore'], name=\"Z-Score\"), row=2, col=1)\n",
|
||||
"fig.add_hline(y=entry_threshold, line_dash=\"dot\", row=2, col=1)\n",
|
||||
"fig.add_hline(y=-entry_threshold, line_dash=\"dot\", row=2, col=1)\n",
|
||||
"fig.add_hline(y=0, line_dash=\"dash\", row=2, col=1)\n",
|
||||
"\n",
|
||||
"# PnL\n",
|
||||
"fig.add_trace(go.Scatter(x=signals.index, y=signals['cum_pnl'], name=\"Cumulative PnL (Net)\"), row=3, col=1)\n",
|
||||
"\n",
|
||||
"fig.update_layout(title=f\"Cointegration Strategy: {symbol_a} vs {symbol_b} (1-Minute Data)\", height=950)\n",
|
||||
"fig.show()\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "python3.12-venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
199
requirements.txt
Normal file
199
requirements.txt
Normal file
@ -0,0 +1,199 @@
|
||||
aiohttp>=3.8.4
|
||||
aiosignal>=1.3.1
|
||||
async-timeout>=4.0.2
|
||||
attrs>=21.2.0
|
||||
beautifulsoup4>=4.10.0
|
||||
black>=23.3.0
|
||||
flake8>=6.0.0
|
||||
certifi>=2020.6.20
|
||||
chardet>=4.0.0
|
||||
charset-normalizer>=3.1.0
|
||||
click>=8.0.3
|
||||
colorama>=0.4.4
|
||||
configobj>=5.0.6
|
||||
cryptography>=3.4.8
|
||||
distro>=1.7.0
|
||||
docker>=5.0.3
|
||||
dockerpty>=0.4.1
|
||||
docopt>=0.6.2
|
||||
eyeD3>=0.8.10
|
||||
filelock>=3.6.0
|
||||
frozenlist>=1.3.3
|
||||
grpcio>=1.30.2
|
||||
hjson>=3.0.2
|
||||
html5lib>=1.1
|
||||
httplib2>=0.20.2
|
||||
idna>=3.3
|
||||
ipython>=8.18.1
|
||||
ipywidgets>=8.1.1
|
||||
ifaddr>=0.1.7
|
||||
IMDbPY>=2021.4.18
|
||||
ipykernel>=6.29.5
|
||||
jeepney>=0.7.1
|
||||
jsonschema>=3.2.0
|
||||
jupyter>=1.0.0
|
||||
keyring>=23.5.0
|
||||
launchpadlib>=1.10.16
|
||||
lazr.restfulclient>=0.14.4
|
||||
lazr.uri>=1.0.6
|
||||
lxml>=4.8.0
|
||||
Mako>=1.1.3
|
||||
Markdown>=3.3.6
|
||||
MarkupSafe>=2.0.1
|
||||
matplotlib>=3.10.3
|
||||
more-itertools>=8.10.0
|
||||
multidict>=6.0.4
|
||||
mypy>=0.942
|
||||
mypy-extensions>=0.4.3
|
||||
nbformat>=5.10.2
|
||||
netaddr>=0.8.0
|
||||
######### netifaces>=0.11.0
|
||||
numpy>=1.26.4,<2.3.0
|
||||
oauthlib>=3.2.0
|
||||
packaging>=23.1
|
||||
pandas>=2.2.3
|
||||
pathspec>=0.11.1
|
||||
pexpect>=4.8.0
|
||||
Pillow>=9.0.1
|
||||
platformdirs>=3.2.0
|
||||
plotly>=5.19.0
|
||||
protobuf>=3.12.4
|
||||
psutil>=5.9.0
|
||||
ptyprocess>=0.7.0
|
||||
pycurl>=7.44.1
|
||||
pyelftools>=0.27
|
||||
Pygments>=2.11.2
|
||||
pyparsing>=2.4.7
|
||||
pyrsistent>=0.18.1
|
||||
python-debian>=0.1.43 #+ubuntu1.1
|
||||
python-dotenv>=0.19.2
|
||||
python-magic>=0.4.24
|
||||
python-xlib>=0.29
|
||||
pyxdg>=0.27
|
||||
PyYAML>=6.0
|
||||
reportlab>=3.6.8
|
||||
requests>=2.25.1
|
||||
requests-file>=1.5.1
|
||||
seaborn>=0.13.2
|
||||
SecretStorage>=3.3.1
|
||||
setproctitle>=1.2.2
|
||||
six>=1.16.0
|
||||
soupsieve>=2.3.1
|
||||
ssh-import-id>=5.11
|
||||
statsmodels>=0.14.4
|
||||
texttable>=1.6.4
|
||||
tldextract>=3.1.2
|
||||
tomli>=1.2.2
|
||||
######## typed-ast>=1.4.3
|
||||
types-aiofiles>=0.1
|
||||
types-annoy>=1.17
|
||||
types-appdirs>=1.4
|
||||
types-atomicwrites>=1.4
|
||||
types-aws-xray-sdk>=2.8
|
||||
types-babel>=2.9
|
||||
types-backports-abc>=0.5
|
||||
types-backports.ssl-match-hostname>=3.7
|
||||
types-beautifulsoup4>=4.10
|
||||
types-bleach>=4.1
|
||||
types-boto>=2.49
|
||||
types-braintree>=4.11
|
||||
types-cachetools>=4.2
|
||||
types-caldav>=0.8
|
||||
types-certifi>=2020.4
|
||||
types-characteristic>=14.3
|
||||
types-chardet>=4.0
|
||||
types-click>=7.1
|
||||
types-click-spinner>=0.1
|
||||
types-colorama>=0.4
|
||||
types-commonmark>=0.9
|
||||
types-contextvars>=0.1
|
||||
types-croniter>=1.0
|
||||
types-cryptography>=3.3
|
||||
types-dataclasses>=0.1
|
||||
types-dateparser>=1.0
|
||||
types-DateTimeRange>=0.1
|
||||
types-decorator>=0.1
|
||||
types-Deprecated>=1.2
|
||||
types-docopt>=0.6
|
||||
types-docutils>=0.17
|
||||
types-editdistance>=0.5
|
||||
types-emoji>=1.2
|
||||
types-entrypoints>=0.3
|
||||
types-enum34>=1.1
|
||||
types-filelock>=3.2
|
||||
types-first>=2.0
|
||||
types-Flask>=1.1
|
||||
types-freezegun>=1.1
|
||||
types-frozendict>=0.1
|
||||
types-futures>=3.3
|
||||
types-html5lib>=1.1
|
||||
types-httplib2>=0.19
|
||||
types-humanfriendly>=9.2
|
||||
types-ipaddress>=1.0
|
||||
types-itsdangerous>=1.1
|
||||
types-JACK-Client>=0.1
|
||||
types-Jinja2>=2.11
|
||||
types-jmespath>=0.10
|
||||
types-jsonschema>=3.2
|
||||
types-Markdown>=3.3
|
||||
types-MarkupSafe>=1.1
|
||||
types-mock>=4.0
|
||||
types-mypy-extensions>=0.4
|
||||
types-mysqlclient>=2.0
|
||||
types-oauthlib>=3.1
|
||||
types-orjson>=3.6
|
||||
types-paramiko>=2.7
|
||||
types-Pillow>=8.3
|
||||
types-polib>=1.1
|
||||
types-prettytable>=2.1
|
||||
types-protobuf>=3.17
|
||||
types-psutil>=5.8
|
||||
types-psycopg2>=2.9
|
||||
types-pyaudio>=0.2
|
||||
types-pycurl>=0.1
|
||||
types-pyfarmhash>=0.2
|
||||
types-Pygments>=2.9
|
||||
types-PyMySQL>=1.0
|
||||
types-pyOpenSSL>=20.0
|
||||
types-pyRFC3339>=0.1
|
||||
types-pysftp>=0.2
|
||||
types-pytest-lazy-fixture>=0.6
|
||||
types-python-dateutil>=2.8
|
||||
types-python-gflags>=3.1
|
||||
types-python-nmap>=0.6
|
||||
types-python-slugify>=5.0
|
||||
types-pytz>=2021.1
|
||||
types-pyvmomi>=7.0
|
||||
types-PyYAML>=5.4
|
||||
types-redis>=3.5
|
||||
types-requests>=2.25
|
||||
types-retry>=0.9
|
||||
types-selenium>=3.141
|
||||
types-Send2Trash>=1.8
|
||||
types-setuptools>=57.4
|
||||
types-simplejson>=3.17
|
||||
types-singledispatch>=3.7
|
||||
types-six>=1.16
|
||||
types-slumber>=0.7
|
||||
types-stripe>=2.59
|
||||
types-tabulate>=0.8
|
||||
types-termcolor>=1.1
|
||||
types-toml>=0.10
|
||||
types-toposort>=1.6
|
||||
types-ttkthemes>=3.2
|
||||
types-typed-ast>=1.4
|
||||
types-tzlocal>=0.1
|
||||
types-ujson>=0.1
|
||||
types-vobject>=0.9
|
||||
types-waitress>=0.1
|
||||
types-Werkzeug>=1.0
|
||||
types-xxhash>=2.0
|
||||
typing-extensions>=3.10.0.2
|
||||
Unidecode>=1.3.3
|
||||
urllib3>=1.26.5
|
||||
wadllib>=1.3.6
|
||||
webencodings>=0.5.1
|
||||
websocket-client>=1.2.3
|
||||
yarl>=1.9.1
|
||||
yfinance>=0.2.65
|
||||
zipp>=1.0.0
|
||||
145
scripts/load_equity_pair_daily.sh
Executable file
145
scripts/load_equity_pair_daily.sh
Executable file
@ -0,0 +1,145 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Usage: ./scripts/load_equity_pair_daily.sh -A GS -B DIA -f 20241201 -t 20250131 -T ./daily_md
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 -A <symbolA> -B <symbolB> -f <from_date (YYYYMMDD)> -t <to_date (YYYYMMDD)> -T <target_directory>"
|
||||
exit 1
|
||||
}
|
||||
# ---------------- cmdline
|
||||
while getopts "A:B:f:t:T:h" opt; do
|
||||
case ${opt} in
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
A )
|
||||
symbolA=$OPTARG
|
||||
;;
|
||||
B )
|
||||
symbolB=$OPTARG
|
||||
;;
|
||||
f )
|
||||
from_date=$OPTARG
|
||||
;;
|
||||
t )
|
||||
to_date=$OPTARG
|
||||
;;
|
||||
T )
|
||||
target_dir=$OPTARG
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" >&2
|
||||
usage
|
||||
;;
|
||||
: )
|
||||
echo "Option -$OPTARG requires an argument." >&2
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
# ---------------- cmdline
|
||||
if [ -z ${symbolA} ] || [ -z ${symbolB} ] || [ -z ${from_date} ] ||[ -z ${to_date} ] || [ -z ${target_dir} ]; then
|
||||
usage
|
||||
fi
|
||||
SourceRoot=cvtt@hs01.cvtt.vpn:/mnt/usb1/md_archive/equity/alpaca_md
|
||||
TargetFile=$(realpath ${target_dir})/${from_date}-${to_date}.${symbolA}-${symbolB}.daily.db
|
||||
|
||||
mkdir -p ${target_dir} || exit 1
|
||||
|
||||
temp_dir=$(mktemp -d)
|
||||
pushd ${temp_dir}
|
||||
clean_temp() {
|
||||
popd
|
||||
rm -rf ${temp_dir}
|
||||
}
|
||||
trap clean_temp EXIT
|
||||
|
||||
echo " ------ Downloading..."
|
||||
|
||||
load_symbol() {
|
||||
# Loop over dates from from_date to to_date
|
||||
symbol=${1}
|
||||
current_date="$from_date"
|
||||
while [[ "$current_date" -le "$to_date" ]]; do
|
||||
year=${current_date:0:4}
|
||||
symb_initial=${symbol:0:1}
|
||||
remote_path="${SourceRoot}/${year}/${symb_initial}/${symbol}/${current_date}.${symbol}.alpaca_1m_bars.db.gz"
|
||||
|
||||
echo "Fetching: $remote_path"
|
||||
Cmd="rsync -avz ${remote_path} ${temp_dir}/ || echo \"Missing: ${current_date}\""
|
||||
echo $Cmd
|
||||
eval $Cmd
|
||||
|
||||
# Increment date
|
||||
current_date=$(date -d "$current_date +1 day" +"%Y%m%d")
|
||||
done
|
||||
}
|
||||
|
||||
load_symbol ${symbolA}
|
||||
load_symbol ${symbolB}
|
||||
|
||||
gunzip *.gz
|
||||
ls -l ${temp_dir}
|
||||
|
||||
sqlite3 "$TargetFile" <<EOF
|
||||
CREATE TABLE IF NOT EXISTS md_daily_bars (
|
||||
date TEXT,
|
||||
symbol TEXT,
|
||||
open REAL,
|
||||
high REAL,
|
||||
low REAL,
|
||||
close REAL,
|
||||
volume REAL,
|
||||
vwap REAL,
|
||||
num_trades INTEGER
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS idx_md_daily_bars_date_symbol
|
||||
ON md_daily_bars (date, symbol);
|
||||
EOF
|
||||
|
||||
set -euo pipefail
|
||||
shopt -s nullglob
|
||||
|
||||
for source_file in *.alpaca_1m_bars.db; do
|
||||
[[ -f "$source_file" ]] || continue
|
||||
|
||||
symbol=$(basename "$source_file" | cut -d. -f2)
|
||||
|
||||
echo "Processing: $source_file (symbol=$symbol)"
|
||||
file_date=${source_file:0:8}
|
||||
export TZ=America/New_York
|
||||
offset_raw=$(date -d "$file_date" '+%z')
|
||||
hours=$(echo ${offset_raw:1:2} | awk '{printf("%d", $1)}')
|
||||
sqlite_offset="-${hours} hours"
|
||||
|
||||
sqlite3 $source_file <<EOF
|
||||
delete from md_1min_bars
|
||||
where
|
||||
time(tstamp, '${sqlite_offset}') < time('09:30:00')
|
||||
or time(tstamp, '${sqlite_offset}') > time('16:00:00');
|
||||
|
||||
EOF
|
||||
sqlite3 $TargetFile <<EOF
|
||||
ATTACH DATABASE '$source_file' AS tmp;
|
||||
|
||||
INSERT OR REPLACE INTO md_daily_bars
|
||||
SELECT
|
||||
date(tstamp) AS date,
|
||||
'$symbol' AS symbol,
|
||||
(SELECT open FROM tmp.md_1min_bars ORDER BY tstamp ASC LIMIT 1) AS open,
|
||||
MAX(high) AS high,
|
||||
MIN(low) AS low,
|
||||
(SELECT close FROM tmp.md_1min_bars ORDER BY tstamp DESC LIMIT 1) AS close,
|
||||
SUM(volume) AS volume,
|
||||
SUM(num_trades) AS num_trades,
|
||||
SUM(vwap * volume) / SUM(volume) AS vwap
|
||||
FROM tmp.md_1min_bars;
|
||||
EOF
|
||||
|
||||
done
|
||||
|
||||
sqlite3 $TargetFile <<EOF
|
||||
select * from md_daily_bars;
|
||||
EOF
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user