264 lines
7.8 KiB
Plaintext
264 lines
7.8 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c7dac550-c0ed-4ec7-846e-8edb2086c9cc",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Augmented Dickey-Fuller Test (ADF)\n",
|
||
"Stationarity Test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "778b9362-37e3-40e0-a20a-1ca5e2cddf05",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Preparing The data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"id": "998ecc54-aaba-4761-bb98-1eda5c9fa091",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>tstamp</th>\n",
|
||
" <th>target</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722470400000000000</td>\n",
|
||
" <td>64640.679892</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722470460000000000</td>\n",
|
||
" <td>64652.991289</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722470520000000000</td>\n",
|
||
" <td>64660.005093</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722470580000000000</td>\n",
|
||
" <td>64653.482847</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722470640000000000</td>\n",
|
||
" <td>64687.458279</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1372</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722556500000000000</td>\n",
|
||
" <td>65439.307663</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1373</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722556560000000000</td>\n",
|
||
" <td>65445.733114</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1374</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722556620000000000</td>\n",
|
||
" <td>65446.371741</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1375</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722556680000000000</td>\n",
|
||
" <td>65420.879478</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1376</th>\n",
|
||
" <td>PAIR-BTC-USDT</td>\n",
|
||
" <td>1722556740000000000</td>\n",
|
||
" <td>65377.032222</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>1377 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id tstamp target\n",
|
||
"0 PAIR-BTC-USDT 1722470400000000000 64640.679892\n",
|
||
"1 PAIR-BTC-USDT 1722470460000000000 64652.991289\n",
|
||
"2 PAIR-BTC-USDT 1722470520000000000 64660.005093\n",
|
||
"3 PAIR-BTC-USDT 1722470580000000000 64653.482847\n",
|
||
"4 PAIR-BTC-USDT 1722470640000000000 64687.458279\n",
|
||
"... ... ... ...\n",
|
||
"1372 PAIR-BTC-USDT 1722556500000000000 65439.307663\n",
|
||
"1373 PAIR-BTC-USDT 1722556560000000000 65445.733114\n",
|
||
"1374 PAIR-BTC-USDT 1722556620000000000 65446.371741\n",
|
||
"1375 PAIR-BTC-USDT 1722556680000000000 65420.879478\n",
|
||
"1376 PAIR-BTC-USDT 1722556740000000000 65377.032222\n",
|
||
"\n",
|
||
"[1377 rows x 3 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from statsmodels.tsa.stattools import adfuller\n",
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"def demo_example_data() -> pd.Series:\n",
|
||
" # Generate example time series data\n",
|
||
" # np.random.seed(0)\n",
|
||
" time_series_data = np.random.randn(100) # Random data for demonstration\n",
|
||
" \n",
|
||
" # Create a pandas Series\n",
|
||
" data = pd.Series(time_series_data)\n",
|
||
" \n",
|
||
" # Optionally, you can add a datetime index if you have time-indexed data\n",
|
||
" dates = pd.date_range(start='2020-01-01', periods=len(time_series_data), freq='D')\n",
|
||
" data = pd.Series(time_series_data, index=dates)\n",
|
||
"\n",
|
||
" # Display the first few rows of the data\n",
|
||
" print(data.head())\n",
|
||
" return data\n",
|
||
"\n",
|
||
"def load_df_from_db(file: str, query: str) -> pd.DataFrame:\n",
|
||
" import sqlite3 \n",
|
||
" \n",
|
||
" conn = sqlite3.connect(file)\n",
|
||
" df = pd.read_sql_query(query, conn)\n",
|
||
" df['timestamp'] = pd.to_datetime(df['tstamp'])\n",
|
||
" df.set_index('timestamp', inplace=True)\n",
|
||
" return df\n",
|
||
"\n",
|
||
"file_path = \"/workspace/data/crypto_md/20240801.mktdata.ohlcv.db\"\n",
|
||
"instrument_id='PAIR-BTC-USDT'\n",
|
||
"query = f\"\"\"\n",
|
||
"select \n",
|
||
" instrument_id as id, \n",
|
||
" tstamp, \n",
|
||
" vwap \n",
|
||
"from bnbspot_ohlcv_1min \n",
|
||
"where instrument_id = '{instrument_id}'\n",
|
||
"\"\"\"\n",
|
||
"\n",
|
||
"df = load_df_from_db(file=file_path, query=query)\n",
|
||
"df.rename(columns={'vwap': 'target'}, inplace=True)\n",
|
||
"# df[\"tstamp2\"] = df.index\n",
|
||
"df = df.reset_index()\n",
|
||
"df = df.drop([\"timestamp\"], axis=1) \n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "43e43154-5a04-4a1d-977c-7f930d62f241",
|
||
"metadata": {},
|
||
"source": [
|
||
"## Running Test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"id": "b97d357f-787b-4cff-849f-b91b1ec35e7c",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"2020-01-01 -1.652908\n",
|
||
"2020-01-02 -0.157302\n",
|
||
"2020-01-03 -1.396187\n",
|
||
"2020-01-04 0.150374\n",
|
||
"2020-01-05 1.048603\n",
|
||
"Freq: D, dtype: float64\n",
|
||
"ADF Statistic: -9.985535987881171\n",
|
||
"p-value: 2.060269774403535e-17\n",
|
||
"Critical Values: {'1%': -3.498198082189098, '5%': -2.891208211860468, '10%': -2.5825959973472097}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from statsmodels.tsa.stattools import adfuller\n",
|
||
"\n",
|
||
"# Example time series data\n",
|
||
"data = demo_example_data()\n",
|
||
"\n",
|
||
"# Perform the ADF test\n",
|
||
"result = adfuller(data)\n",
|
||
"\n",
|
||
"# Extract and print the results\n",
|
||
"print('ADF Statistic:', result[0])\n",
|
||
"print('p-value:', result[1])\n",
|
||
"print('Critical Values:', result[4])\n"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.10.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|