{ "cells": [ { "cell_type": "markdown", "id": "c7dac550-c0ed-4ec7-846e-8edb2086c9cc", "metadata": {}, "source": [ "# Augmented Dickey-Fuller Test (ADF)\n", "Stationarity Test" ] }, { "cell_type": "markdown", "id": "778b9362-37e3-40e0-a20a-1ca5e2cddf05", "metadata": {}, "source": [ "## Preparing The data" ] }, { "cell_type": "code", "execution_count": 9, "id": "998ecc54-aaba-4761-bb98-1eda5c9fa091", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtstamptarget
0PAIR-BTC-USDT172247040000000000064640.679892
1PAIR-BTC-USDT172247046000000000064652.991289
2PAIR-BTC-USDT172247052000000000064660.005093
3PAIR-BTC-USDT172247058000000000064653.482847
4PAIR-BTC-USDT172247064000000000064687.458279
............
1372PAIR-BTC-USDT172255650000000000065439.307663
1373PAIR-BTC-USDT172255656000000000065445.733114
1374PAIR-BTC-USDT172255662000000000065446.371741
1375PAIR-BTC-USDT172255668000000000065420.879478
1376PAIR-BTC-USDT172255674000000000065377.032222
\n", "

1377 rows × 3 columns

\n", "
" ], "text/plain": [ " id tstamp target\n", "0 PAIR-BTC-USDT 1722470400000000000 64640.679892\n", "1 PAIR-BTC-USDT 1722470460000000000 64652.991289\n", "2 PAIR-BTC-USDT 1722470520000000000 64660.005093\n", "3 PAIR-BTC-USDT 1722470580000000000 64653.482847\n", "4 PAIR-BTC-USDT 1722470640000000000 64687.458279\n", "... ... ... ...\n", "1372 PAIR-BTC-USDT 1722556500000000000 65439.307663\n", "1373 PAIR-BTC-USDT 1722556560000000000 65445.733114\n", "1374 PAIR-BTC-USDT 1722556620000000000 65446.371741\n", "1375 PAIR-BTC-USDT 1722556680000000000 65420.879478\n", "1376 PAIR-BTC-USDT 1722556740000000000 65377.032222\n", "\n", "[1377 rows x 3 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from statsmodels.tsa.stattools import adfuller\n", "import numpy as np\n", "\n", "def demo_example_data() -> pd.Series:\n", " # Generate example time series data\n", " # np.random.seed(0)\n", " time_series_data = np.random.randn(100) # Random data for demonstration\n", " \n", " # Create a pandas Series\n", " data = pd.Series(time_series_data)\n", " \n", " # Optionally, you can add a datetime index if you have time-indexed data\n", " dates = pd.date_range(start='2020-01-01', periods=len(time_series_data), freq='D')\n", " data = pd.Series(time_series_data, index=dates)\n", "\n", " # Display the first few rows of the data\n", " print(data.head())\n", " return data\n", "\n", "def load_df_from_db(file: str, query: str) -> pd.DataFrame:\n", " import sqlite3 \n", " \n", " conn = sqlite3.connect(file)\n", " df = pd.read_sql_query(query, conn)\n", " df['timestamp'] = pd.to_datetime(df['tstamp'])\n", " df.set_index('timestamp', inplace=True)\n", " return df\n", "\n", "file_path = \"/workspace/data/crypto_md/20240801.mktdata.ohlcv.db\"\n", "instrument_id='PAIR-BTC-USDT'\n", "query = f\"\"\"\n", "select \n", " instrument_id as id, \n", " tstamp, \n", " vwap \n", "from bnbspot_ohlcv_1min \n", "where instrument_id = '{instrument_id}'\n", "\"\"\"\n", "\n", "df = load_df_from_db(file=file_path, query=query)\n", "df.rename(columns={'vwap': 'target'}, inplace=True)\n", "# df[\"tstamp2\"] = df.index\n", "df = df.reset_index()\n", "df = df.drop([\"timestamp\"], axis=1) \n", "df" ] }, { "cell_type": "markdown", "id": "43e43154-5a04-4a1d-977c-7f930d62f241", "metadata": {}, "source": [ "## Running Test" ] }, { "cell_type": "code", "execution_count": 6, "id": "b97d357f-787b-4cff-849f-b91b1ec35e7c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2020-01-01 -1.652908\n", "2020-01-02 -0.157302\n", "2020-01-03 -1.396187\n", "2020-01-04 0.150374\n", "2020-01-05 1.048603\n", "Freq: D, dtype: float64\n", "ADF Statistic: -9.985535987881171\n", "p-value: 2.060269774403535e-17\n", "Critical Values: {'1%': -3.498198082189098, '5%': -2.891208211860468, '10%': -2.5825959973472097}\n" ] } ], "source": [ "import pandas as pd\n", "from statsmodels.tsa.stattools import adfuller\n", "\n", "# Example time series data\n", "data = demo_example_data()\n", "\n", "# Perform the ADF test\n", "result = adfuller(data)\n", "\n", "# Extract and print the results\n", "print('ADF Statistic:', result[0])\n", "print('p-value:', result[1])\n", "print('Critical Values:', result[4])\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 5 }