jupyter_gpu/pytorch/notebooks/Mean Reversion Studies.ipynb
2024-08-14 01:54:48 +00:00

264 lines
7.8 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "c7dac550-c0ed-4ec7-846e-8edb2086c9cc",
"metadata": {},
"source": [
"# Augmented Dickey-Fuller Test (ADF)\n",
"Stationarity Test"
]
},
{
"cell_type": "markdown",
"id": "778b9362-37e3-40e0-a20a-1ca5e2cddf05",
"metadata": {},
"source": [
"## Preparing The data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "998ecc54-aaba-4761-bb98-1eda5c9fa091",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>tstamp</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722470400000000000</td>\n",
" <td>64640.679892</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722470460000000000</td>\n",
" <td>64652.991289</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722470520000000000</td>\n",
" <td>64660.005093</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722470580000000000</td>\n",
" <td>64653.482847</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722470640000000000</td>\n",
" <td>64687.458279</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1372</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722556500000000000</td>\n",
" <td>65439.307663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1373</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722556560000000000</td>\n",
" <td>65445.733114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1374</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722556620000000000</td>\n",
" <td>65446.371741</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1375</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722556680000000000</td>\n",
" <td>65420.879478</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1376</th>\n",
" <td>PAIR-BTC-USDT</td>\n",
" <td>1722556740000000000</td>\n",
" <td>65377.032222</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1377 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" id tstamp target\n",
"0 PAIR-BTC-USDT 1722470400000000000 64640.679892\n",
"1 PAIR-BTC-USDT 1722470460000000000 64652.991289\n",
"2 PAIR-BTC-USDT 1722470520000000000 64660.005093\n",
"3 PAIR-BTC-USDT 1722470580000000000 64653.482847\n",
"4 PAIR-BTC-USDT 1722470640000000000 64687.458279\n",
"... ... ... ...\n",
"1372 PAIR-BTC-USDT 1722556500000000000 65439.307663\n",
"1373 PAIR-BTC-USDT 1722556560000000000 65445.733114\n",
"1374 PAIR-BTC-USDT 1722556620000000000 65446.371741\n",
"1375 PAIR-BTC-USDT 1722556680000000000 65420.879478\n",
"1376 PAIR-BTC-USDT 1722556740000000000 65377.032222\n",
"\n",
"[1377 rows x 3 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"from statsmodels.tsa.stattools import adfuller\n",
"import numpy as np\n",
"\n",
"def demo_example_data() -> pd.Series:\n",
" # Generate example time series data\n",
" # np.random.seed(0)\n",
" time_series_data = np.random.randn(100) # Random data for demonstration\n",
" \n",
" # Create a pandas Series\n",
" data = pd.Series(time_series_data)\n",
" \n",
" # Optionally, you can add a datetime index if you have time-indexed data\n",
" dates = pd.date_range(start='2020-01-01', periods=len(time_series_data), freq='D')\n",
" data = pd.Series(time_series_data, index=dates)\n",
"\n",
" # Display the first few rows of the data\n",
" print(data.head())\n",
" return data\n",
"\n",
"def load_df_from_db(file: str, query: str) -> pd.DataFrame:\n",
" import sqlite3 \n",
" \n",
" conn = sqlite3.connect(file)\n",
" df = pd.read_sql_query(query, conn)\n",
" df['timestamp'] = pd.to_datetime(df['tstamp'])\n",
" df.set_index('timestamp', inplace=True)\n",
" return df\n",
"\n",
"file_path = \"/workspace/data/crypto_md/20240801.mktdata.ohlcv.db\"\n",
"instrument_id='PAIR-BTC-USDT'\n",
"query = f\"\"\"\n",
"select \n",
" instrument_id as id, \n",
" tstamp, \n",
" vwap \n",
"from bnbspot_ohlcv_1min \n",
"where instrument_id = '{instrument_id}'\n",
"\"\"\"\n",
"\n",
"df = load_df_from_db(file=file_path, query=query)\n",
"df.rename(columns={'vwap': 'target'}, inplace=True)\n",
"# df[\"tstamp2\"] = df.index\n",
"df = df.reset_index()\n",
"df = df.drop([\"timestamp\"], axis=1) \n",
"df"
]
},
{
"cell_type": "markdown",
"id": "43e43154-5a04-4a1d-977c-7f930d62f241",
"metadata": {},
"source": [
"## Running Test"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b97d357f-787b-4cff-849f-b91b1ec35e7c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2020-01-01 -1.652908\n",
"2020-01-02 -0.157302\n",
"2020-01-03 -1.396187\n",
"2020-01-04 0.150374\n",
"2020-01-05 1.048603\n",
"Freq: D, dtype: float64\n",
"ADF Statistic: -9.985535987881171\n",
"p-value: 2.060269774403535e-17\n",
"Critical Values: {'1%': -3.498198082189098, '5%': -2.891208211860468, '10%': -2.5825959973472097}\n"
]
}
],
"source": [
"import pandas as pd\n",
"from statsmodels.tsa.stattools import adfuller\n",
"\n",
"# Example time series data\n",
"data = demo_example_data()\n",
"\n",
"# Perform the ADF test\n",
"result = adfuller(data)\n",
"\n",
"# Extract and print the results\n",
"print('ADF Statistic:', result[0])\n",
"print('p-value:', result[1])\n",
"print('Critical Values:', result[4])\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}