99 lines
3.0 KiB
Python
99 lines
3.0 KiB
Python
# Kalman Filter Mean Reversion Strategy
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
#import statsmodels.formula.api as sm
|
|
import statsmodels.tsa.stattools as ts
|
|
#import statsmodels.tsa.vector_ar.vecm as vm
|
|
|
|
df=pd.read_csv('inputData_EWA_EWC.csv')
|
|
df['Date']=pd.to_datetime(df['Date'], format='%Y%m%d').dt.date # remove HH:MM:SS
|
|
df.set_index('Date', inplace=True)
|
|
|
|
x=df['EWA']
|
|
y=df['EWC']
|
|
|
|
x=np.array(ts.add_constant(x))[:, [1,0]] # Augment x with ones to accomodate possible offset in the regression between y vs x.
|
|
|
|
delta=0.0001 # delta=1 gives fastest change in beta, delta=0.000....1 allows no change (like traditional linear regression).
|
|
|
|
yhat=np.full(y.shape[0], np.nan) # measurement prediction
|
|
e=yhat.copy()
|
|
Q=yhat.copy()
|
|
|
|
# For clarity, we denote R(t|t) by P(t). Initialize R, P and beta.
|
|
R=np.zeros((2,2))
|
|
P=R.copy()
|
|
beta=np.full((2, x.shape[0]), np.nan)
|
|
Vw=delta/(1-delta)*np.eye(2)
|
|
Ve=0.001
|
|
|
|
# Initialize beta(:, 1) to zero
|
|
beta[:, 0]=0
|
|
|
|
# Given initial beta and R (and P)
|
|
for t in range(len(y)):
|
|
if t > 0:
|
|
beta[:, t]=beta[:, t-1]
|
|
R=P+Vw
|
|
|
|
yhat[t]=np.dot(x[t, :], beta[:, t])
|
|
# print('FIRST: yhat[t]=', yhat[t])
|
|
|
|
Q[t]=np.dot(np.dot(x[t, :], R), x[t, :].T)+Ve
|
|
# print('Q[t]=', Q[t])
|
|
|
|
# Observe y(t)
|
|
e[t]=y[t]-yhat[t] # measurement prediction error
|
|
# print('e[t]=', e[t])
|
|
# print('SECOND: yhat[t]=', yhat[t])
|
|
|
|
|
|
K=np.dot(R, x[t, :].T)/Q[t] # Kalman gain
|
|
# print(K)
|
|
|
|
beta[:, t]=beta[:, t]+np.dot(K, e[t]) # State update. Equation 3.11
|
|
# print(beta[:, t])
|
|
|
|
# P=R-np.dot(np.dot(K, x[t, :]), R) # State covariance update. Euqation 3.12
|
|
P=R-np.dot(np.outer(K, x[t, :]), R) # Thanks to Matthias for chaning np.dot -> np.outer!
|
|
|
|
# print(R)
|
|
|
|
plt.plot(beta[0, :])
|
|
plt.plot(beta[1, :])
|
|
plt.plot(e[2:])
|
|
plt.plot(np.sqrt(Q[2:]))
|
|
|
|
longsEntry=e < -np.sqrt(Q)
|
|
longsExit =e > 0
|
|
|
|
shortsEntry=e > np.sqrt(Q)
|
|
shortsExit =e < 0
|
|
|
|
numUnitsLong=np.zeros(longsEntry.shape)
|
|
numUnitsLong[:]=np.nan
|
|
|
|
numUnitsShort=np.zeros(shortsEntry.shape)
|
|
numUnitsShort[:]=np.nan
|
|
|
|
numUnitsLong[0]=0
|
|
numUnitsLong[longsEntry]=1
|
|
numUnitsLong[longsExit]=0
|
|
numUnitsLong=pd.DataFrame(numUnitsLong)
|
|
numUnitsLong.fillna(method='ffill', inplace=True)
|
|
|
|
numUnitsShort[0]=0
|
|
numUnitsShort[shortsEntry]=-1
|
|
numUnitsShort[shortsExit]=0
|
|
numUnitsShort=pd.DataFrame(numUnitsShort)
|
|
numUnitsShort.fillna(method='ffill', inplace=True)
|
|
|
|
numUnits=numUnitsLong+numUnitsShort
|
|
positions=pd.DataFrame(np.tile(numUnits.values, [1, 2]) * ts.add_constant(-beta[0,:].T)[:, [1,0]] *df.values) # [hedgeRatio -ones(size(hedgeRatio))] is the shares allocation, [hedgeRatio -ones(size(hedgeRatio))].*y2 is the dollar capital allocation, while positions is the dollar capital in each ETF.
|
|
pnl=np.sum((positions.shift().values)*(df.pct_change().values), axis=1) # daily P&L of the strategy
|
|
ret=pnl/np.sum(np.abs(positions.shift()), axis=1)
|
|
(np.cumprod(1+ret)-1).plot()
|
|
print('APR=%f Sharpe=%f' % (np.prod(1+ret)**(252/len(ret))-1, np.sqrt(252)*np.mean(ret)/np.std(ret)))
|
|
#APR=0.313225 Sharpe=3.464060 |