2025-06-05 08:48:33 +02:00

54 lines
1.9 KiB
Python

# Box 6.1 and Example 6.1: Finding Correlations between Returns of Different Time Frames
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#import statsmodels.formula.api as sm
#import statsmodels.tsa.stattools as ts
#import statsmodels.tsa.vector_ar.vecm as vm
from scipy.stats.stats import pearsonr
df=pd.read_csv('inputDataOHLCDaily_TU_20120511.csv')
df['Date']=pd.to_datetime(df['Date'], format='%Y%m%d').dt.date # remove HH:MM:SS
df.set_index('Date', inplace=True)
for lookback in [1, 5, 10, 25, 60, 120, 250]:
for holddays in [1, 5, 10, 25, 60, 120, 250]:
ret_lag=df.pct_change(periods=lookback)
ret_fut=df.shift(-holddays).pct_change(periods=holddays)
if (lookback >= holddays):
indepSet=range(0, ret_lag.shape[0], holddays)
else:
indepSet=range(0, ret_lag.shape[0], lookback)
ret_lag=ret_lag.iloc[indepSet]
ret_fut=ret_fut.iloc[indepSet]
goodDates=(ret_lag.notna() & ret_fut.notna()).values
(cc, pval)=pearsonr(ret_lag[goodDates], ret_fut[goodDates])
print('%4i %4i %7.4f %7.4f' % (lookback, holddays, cc, pval))
lookback=250
holddays=25
longs=df > df.shift(lookback)
shorts=df < df.shift(lookback)
pos=np.zeros(df.shape)
for h in range(holddays-1):
long_lag=longs.shift(h).fillna(False)
short_lag=shorts.shift(h).fillna(False)
pos[long_lag]=pos[long_lag]+1
pos[short_lag]=pos[short_lag]-1
pos=pd.DataFrame(pos)
pnl=np.sum((pos.shift().values)*(df.pct_change().values), axis=1) # daily P&L of the strategy
ret=pnl/np.sum(np.abs(pos.shift()), axis=1)
cumret=(np.cumprod(1+ret)-1)
cumret.plot()
print('APR=%f Sharpe=%f' % (np.prod(1+ret)**(252/len(ret))-1, np.sqrt(252)*np.mean(ret)/np.std(ret)))
from calculateMaxDD import calculateMaxDD
maxDD, maxDDD, i=calculateMaxDD(cumret.fillna(0))
print('Max DD=%f Max DDD in days=%i' % (maxDD, maxDDD))