# Augmented Dickey-Fuller Test (ADF)
Stationarity Test

## Preparing The data

In [9]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import numpy as np

def demo_example_data() -> pd.Series:
    # Generate example time series data
    # np.random.seed(0)
    time_series_data = np.random.randn(100)  # Random data for demonstration
    
    # Create a pandas Series
    data = pd.Series(time_series_data)
    
    # Optionally, you can add a datetime index if you have time-indexed data
    dates = pd.date_range(start='2020-01-01', periods=len(time_series_data), freq='D')
    data = pd.Series(time_series_data, index=dates)

    # Display the first few rows of the data
    print(data.head())
    return data

def load_df_from_db(file: str, query: str) -> pd.DataFrame:
    import sqlite3 
    
    conn = sqlite3.connect(file)
    df = pd.read_sql_query(query, conn)
    df['timestamp'] = pd.to_datetime(df['tstamp'])
    df.set_index('timestamp', inplace=True)
    return df

file_path = "/workspace/data/crypto_md/20240801.mktdata.ohlcv.db"
instrument_id='PAIR-BTC-USDT'
query = f"""
select 
    instrument_id as id, 
    tstamp, 
    vwap 
from bnbspot_ohlcv_1min 
where instrument_id = '{instrument_id}'
"""

df = load_df_from_db(file=file_path, query=query)
df.rename(columns={'vwap': 'target'}, inplace=True)
# df["tstamp2"] = df.index
df = df.reset_index()
df = df.drop(["timestamp"], axis=1) 
df

Unnamed: 0,id,tstamp,target
0,PAIR-BTC-USDT,1722470400000000000,64640.679892
1,PAIR-BTC-USDT,1722470460000000000,64652.991289
2,PAIR-BTC-USDT,1722470520000000000,64660.005093
3,PAIR-BTC-USDT,1722470580000000000,64653.482847
4,PAIR-BTC-USDT,1722470640000000000,64687.458279
...,...,...,...
1372,PAIR-BTC-USDT,1722556500000000000,65439.307663
1373,PAIR-BTC-USDT,1722556560000000000,65445.733114
1374,PAIR-BTC-USDT,1722556620000000000,65446.371741
1375,PAIR-BTC-USDT,1722556680000000000,65420.879478


## Running Test

In [6]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller

# Example time series data
data = demo_example_data()

# Perform the ADF test
result = adfuller(data)

# Extract and print the results
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:', result[4])


2020-01-01   -1.652908
2020-01-02   -0.157302
2020-01-03   -1.396187
2020-01-04    0.150374
2020-01-05    1.048603
Freq: D, dtype: float64
ADF Statistic: -9.985535987881171
p-value: 2.060269774403535e-17
Critical Values: {'1%': -3.498198082189098, '5%': -2.891208211860468, '10%': -2.5825959973472097}
