clear; stks=load('../Data/inputDataOHLCDaily_20120424', 'stocks', 'tday','cl'); etf=load('../Data/inputData_ETF', 'tday', 'syms', 'cl'); % Ensure data have same dates [tday idx1 idx2]=intersect(stks.tday, etf.tday); stks.cl=stks.cl(idx1, :); etf.cl=etf.cl(idx2, :); % Use SPY idxS=find(strcmp('SPY', etf.syms)); etf.cl=etf.cl(:, idxS); trainDataIdx=find(tday>=20070101 & tday<=20071231); testDataIdx=find(tday > 20071231); isCoint=false(size(stks.stocks)); for s=1:length(stks.stocks) % Combine the two time series into a matrix y2 for input into Johansen test y2=[stks.cl(trainDataIdx, s), etf.cl(trainDataIdx)]; badData=any(isnan(y2), 2); y2(badData, :)=[]; % remove any missing data if (size(y2, 1) > 250) results=johansen(y2, 0, 1); % johansen test with non-zero offset but zero drift, and with the lag k=1. if (results.lr1(1) > results.cvt(1, 1)) isCoint(s)=true; end end end length(find(isCoint)) % 98: there are 98 stocks that are cointegrating with SPY % Form a long-only portfolio with all stocks that cointegrate with SPY, with equal % capital allocation yN=stks.cl(trainDataIdx, isCoint); logMktVal_long=sum(log(yN), 2); % The net market value of the long-only portfolio is same as the "spread" % Confirm that the portfolio cointegrates with SPY ytest=[logMktVal_long, log(etf.cl(trainDataIdx))]; results=johansen(ytest, 0, 1); % johansen test with non-zero offset but zero drift, and with the lag k=1. prt(results); % Output: % Johansen MLE estimates % NULL: Trace Statistic Crit 90% Crit 95% Crit 99% % r <= 0 variable 1 15.869 13.429 15.494 19.935 % r <= 1 variable 2 6.197 2.705 3.841 6.635 % % NULL: Eigen Statistic Crit 90% Crit 95% Crit 99% % r <= 0 variable 1 9.671 12.297 14.264 18.520 % r <= 1 variable 2 6.197 2.705 3.841 6.635 results.evec % % ans = % % 1.0939 -0.2799 % -105.5600 56.0933 % Apply linear mean-reversion model on test set yNplus=[stks.cl(testDataIdx, isCoint), etf.cl(testDataIdx)]; % Array of stock and ETF prices weights=[repmat(results.evec(1, 1), size(stks.cl(testDataIdx, isCoint))), ... repmat(results.evec(2, 1), size(etf.cl(testDataIdx)))]; % Array of log market value of stocks and ETF's logMktVal=smartsum(weights.*log(yNplus), 2); % Log market value of long-short portfolio lookback=5; numUnits=-(logMktVal-movingAvg(logMktVal, lookback))./movingStd(logMktVal, lookback); % capital invested in portfolio in dollars. movingAvg and movingStd are functions from epchan.com/book2 positions=repmat(numUnits, [1 size(weights, 2)]).*weights; % positions is the dollar capital in each stock or ETF. pnl=smartsum(lag(positions, 1).*(log(yNplus)-lag(log(yNplus), 1)), 2); % daily P&L of the strategy ret=pnl./smartsum(abs(lag(positions, 1)), 2); % return is P&L divided by gross market value of portfolio ret(isnan(ret))=0; figure; plot(cumprod(1+ret)-1); % Cumulative compounded return fprintf(1, 'APR=%f Sharpe=%f\n', prod(1+ret).^(252/length(ret))-1, sqrt(252)*mean(ret)/std(ret)); % APR=0.044930 Sharpe=1.319397