# Box 6.1 and Example 6.1: Finding Correlations between Returns of Different Time Frames import numpy as np import pandas as pd #import matplotlib.pyplot as plt #import statsmodels.formula.api as sm #import statsmodels.tsa.stattools as ts #import statsmodels.tsa.vector_ar.vecm as vm from scipy.stats.stats import pearsonr df=pd.read_csv('inputDataOHLCDaily_TU_20120511.csv') df['Date']=pd.to_datetime(df['Date'], format='%Y%m%d').dt.date # remove HH:MM:SS df.set_index('Date', inplace=True) for lookback in [1, 5, 10, 25, 60, 120, 250]: for holddays in [1, 5, 10, 25, 60, 120, 250]: ret_lag=df.pct_change(periods=lookback) ret_fut=df.shift(-holddays).pct_change(periods=holddays) if (lookback >= holddays): indepSet=range(0, ret_lag.shape[0], holddays) else: indepSet=range(0, ret_lag.shape[0], lookback) ret_lag=ret_lag.iloc[indepSet] ret_fut=ret_fut.iloc[indepSet] goodDates=(ret_lag.notna() & ret_fut.notna()).values (cc, pval)=pearsonr(ret_lag[goodDates], ret_fut[goodDates]) print('%4i %4i %7.4f %7.4f' % (lookback, holddays, cc, pval)) lookback=250 holddays=25 longs=df > df.shift(lookback) shorts=df < df.shift(lookback) pos=np.zeros(df.shape) for h in range(holddays-1): long_lag=longs.shift(h).fillna(False) short_lag=shorts.shift(h).fillna(False) pos[long_lag]=pos[long_lag]+1 pos[short_lag]=pos[short_lag]-1 pos=pd.DataFrame(pos) pnl=np.sum((pos.shift().values)*(df.pct_change().values), axis=1) # daily P&L of the strategy ret=pnl/np.sum(np.abs(pos.shift()), axis=1) cumret=(np.cumprod(1+ret)-1) cumret.plot() print('APR=%f Sharpe=%f' % (np.prod(1+ret)**(252/len(ret))-1, np.sqrt(252)*np.mean(ret)/np.std(ret))) from calculateMaxDD import calculateMaxDD maxDD, maxDDD, i=calculateMaxDD(cumret.fillna(0)) print('Max DD=%f Max DDD in days=%i' % (maxDD, maxDDD))