2025-06-05 08:48:33 +02:00

44 lines
1.7 KiB
Python

# Example 5.1: Pair Trading USD.AUD vs USD.CAD Using the Johansen Eigenvector
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#import statsmodels.formula.api as sm
#import statsmodels.tsa.stattools as ts
import statsmodels.tsa.vector_ar.vecm as vm
df1=pd.read_csv('inputData_USDCAD_20120426.csv')
df1['Date']=pd.to_datetime(df1['Date'], format='%Y%m%d').dt.date # remove HH:MM:SS
df1.rename(columns={'Close': 'CAD'}, inplace=True)
df1['CAD']=1/df1['CAD']
df2=pd.read_csv('inputData_AUDUSD_20120426.csv')
df2['Date']=pd.to_datetime(df2['Date'], format='%Y%m%d').dt.date # remove HH:MM:SS
df2.rename(columns={'Close': 'AUD'}, inplace=True)
df=pd.merge(df1, df2, how='inner', on='Date')
df.set_index('Date', inplace=True)
trainlen=250
lookback=20
hedgeRatio=np.full(df.shape, np.NaN)
numUnits=np.full(df.shape[0], np.NaN)
for t in range(trainlen+1, df.shape[0]):
# Johansen test
result=vm.coint_johansen(df.values[(t-trainlen-1):t-1], det_order=0, k_ar_diff=1)
hedgeRatio[t,:]=result.evec[:, 0]
yport=pd.DataFrame(np.dot(df.values[(t-lookback):t], result.evec[:, 0])) # (net) market value of portfolio
ma=yport.mean()
mstd=yport.std()
numUnits[t]=-(yport.iloc[-1,:]-ma)/mstd
positions=pd.DataFrame(np.expand_dims(numUnits, axis=1)*hedgeRatio)*df.values # results.evec(:, 1)' can be viewed as the capital allocation, while positions is the dollar capital in each ETF.
pnl=np.sum((positions.shift().values)*(df.pct_change().values), axis=1)# daily P&L of the strategy
ret=pnl/np.sum(np.abs(positions.shift()), axis=1)
(np.cumprod(1+ret)-1).plot()
print('APR=%f Sharpe=%f' % (np.prod(1+ret)**(252/len(ret))-1, np.sqrt(252)*np.mean(ret)/np.std(ret)))
# APR=0.064512 Sharpe=1.362926