######################################################
# Stat 6721 - Project
#
# Sarah Rathwell
#
# Objective - Investigate Merck and Pfizer for 2013/14
# and trade on 2015 - FINAL.
######################################################
rm(list=ls())
par(mfrow=c(1,1), oma=c(0,0,0,0))
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(plyr)
######################################################
cat(" Project - Merck vs Pfizer.\n\n",
"Last modification:",date(),'\n')
## Project - Merck vs Pfizer.
##
## Last modification: Sun Dec 4 20:26:14 2016
######################################################
MERK <- getSymbols('MRK', from = '2013-01-01',to = '2015-12-31',
adjust = T, auto.assign = FALSE)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
PFE <- getSymbols('PFE', from = '2013-01-01',to = '2015-12-31',
adjust = T, auto.assign = FALSE)
head(MERK)
## MRK.Open MRK.High MRK.Low MRK.Close MRK.Volume MRK.Adjusted
## 2013-01-02 37.89033 37.89938 37.05757 37.41964 15956900 36.52689
## 2013-01-03 37.91748 38.40627 37.21145 38.31576 23543400 37.40163
## 2013-01-04 38.43342 38.46963 37.85412 37.98989 15428500 37.08354
## 2013-01-07 38.05326 38.18903 37.88127 38.12567 11503600 37.21608
## 2013-01-08 38.24334 38.64161 38.17998 38.17998 14750900 37.26909
## 2013-01-09 38.37006 38.60541 38.19808 38.55110 10073100 37.63136
head(PFE)
## PFE.Open PFE.High PFE.Low PFE.Close PFE.Volume PFE.Adjusted
## 2013-01-02 23.09427 23.43802 22.91335 23.43802 33522200 22.58393
## 2013-01-03 23.50134 23.51039 23.12141 23.38374 33757400 22.53163
## 2013-01-04 23.38374 23.51039 23.30233 23.48325 28604900 22.62751
## 2013-01-07 23.43802 23.56466 23.31138 23.50134 25779900 22.64494
## 2013-01-08 23.51943 23.74558 23.50134 23.53752 31057000 22.67981
## 2013-01-09 23.71844 24.02600 23.69131 23.94459 34210200 23.07204
######################################################
# functions
######################################################
run_transfer <- function(dF){
MRK.5 <- arima(dF[,2], order=c(5,0,0), method="ML")
yPf <- filter(dF[,1], c(1, MRK.5$coef[1:5]), sides=1)[-c(1:5)]
xMf <- filter(dF[,2], c(1, MRK.5$coef[1:5]), sides=1)[-c(1:5)]
cyx=ccf(yPf,xMf,type=c("covariance"), lag.max=6, plot=F)
c1=cyx[-1]
c2=cyx[-2]
c3=cyx[-3]
c4=cyx[-4]
c5=cyx[-5]
axM=acf(xMf,type=c("covariance"), plot=F)
v0=axM[0]
b_1=as.numeric(c1[[1]])/as.numeric(v0[[1]])
b_2=as.numeric(c2[[1]])/as.numeric(v0[[1]])
b_3=as.numeric(c3[[1]])/as.numeric(v0[[1]])
b_4=as.numeric(c4[[1]])/as.numeric(v0[[1]])
b_5=as.numeric(c5[[1]])/as.numeric(v0[[1]])
return(c(b_1,b_2,b_3,b_4,b_5))
}
rolling_beta=function(z,width){
rollapply(z,width=width,FUN=run_transfer,by.column=FALSE,align="right")
}
rolling_threshold=function(z,width){
rollapply(z,width=width,FUN=mean,by.column=FALSE,align="right")
}
rolling_sd=function(z,width){
rollapply(z,width=width, FUN=sd, by.column=FALSE,align="right")
}
compute_equity_curve=function(qty,price){
cash_buy=ifelse(sign(qty)==1,qty*price,0)
cash_sell=ifelse(sign(qty)==-1,-qty*price,0)
position=cumsum(qty)
cumulative_buy=cumsum(cash_buy)
cumulative_sell=cumsum(cash_sell)
equity=cumulative_sell-cumulative_buy+position*price
return(equity)
}
drawdown=function(x){
cummax(x)-x
}
sharpe_ratio <- function(x, rf){
sharpe <- (mean(x, na.rm = T) - rf)/sd(x, na.rm = T)
return(sharpe)
}
maximize_thresh <- function(M){
data$upper = rolling_threshold(data$spread,10) + M*rolling_sd(data$spread,10)
data$lower = rolling_threshold(data$spread,10) - M*rolling_sd(data$spread,10)
buys=ifelse(data$spread > data$upper,1,0)
sells=ifelse(data$spread < data$lower,-1,0)
data$signal=buys+sells
trade_size=1000
signal=as.numeric(data$signal)
signal[is.na(signal)]=0
dist.up=as.numeric(abs(data$spread-data$upper))
dist.up[is.na(dist.up)]=0
dist.down=as.numeric(abs(data$spread-data$lower))
dist.down[is.na(dist.down)]=0
qty_x=rep(0,length(signal))
qty_y=rep(0,length(signal))
for(i in 1:length(signal)){
if(signal[i]==1 ){
qty_x[i]=-round(dist.up[i]*trade_size)
qty_y[i]=round(dist.up[i]*2*trade_size)}
if(signal[i]==-1){
qty_x[i]=round(dist.down[i]*trade_size)
qty_y[i]=-round(dist.down[i]*2*trade_size)}
}
qty_x[length(qty_x)]=-sum(qty_x)
qty_y[length(qty_y)]=-sum(qty_y)
data$qty_x=qty_x
data$qty_y=qty_y
data$equity_curve_x=compute_equity_curve(data$qty_x,data$M)
data$equity_curve_y=compute_equity_curve(data$qty_y,data$P)
return(data$equity_curve_x[252]+data$equity_curve_y[252])
}
######################################################
# in-sample / tuning
######################################################
start_date="2013-01-01"
end_date="2014-12-31"
range=paste(start_date,"::",end_date,sep="")
xM2=MERK[range,6]
yP2=PFE[range,6]
dF=cbind(yP2,xM2)
names(dF)=c("P","M")
ddF=diff(log(dF))[-1]
betas=lag(rolling_beta(ddF,12), 1)
## Warning in log(s2): NaNs produced
data=merge(betas,dF)
colnames(data)=c("b1", "b2", "b3", "b4", "b5", "P", "M")
data$spread1=data$P-betas[,1]*lag(data$M,1)
data$spread2=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)
data$spread3=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)
data$spread4=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)-betas[,4]*lag(data$M,4)
data$spread5=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)-betas[,4]*lag(data$M,4)-
betas[,5]*lag(data$M,5)
max_eq=0
ratio=0
eq_by_thresh <- as.data.frame(matrix(NA,nrow=100,ncol=5))
colnames(eq_by_thresh) <- c("B1", "B2", "B3", "B4", "B5")
eq_by_thresh$ratios <- seq(.01,1,.01)
for(i in 1:5){
data$spread=data[,7+i]
eq_by_thresh[,i] <- as.integer(aaply(seq(.01,1,.01),1,.fun=maximize_thresh))
max_eq[i] <-mean(as.numeric(max(eq_by_thresh[,i])))
ratio[i] <- mean(as.numeric(which(eq_by_thresh[,i]==max(eq_by_thresh[,i]))/100))
}
tuned <- as.data.frame(cbind(max_eq,ratio))
tuned
## max_eq ratio
## 1 394622 0.01
## 2 138334 0.66
## 3 241224 0.01
## 4 -11294 0.01
## 5 -161311 1.00
final_beta <- which(max_eq==max(max_eq))
final_ratio <- tuned$ratio[final_beta]
eq_by_thresh[,c(1:5)] <- round(eq_by_thresh[,c(1:5)]/1000, digits=0)
min(eq_by_thresh[,c(1:5)])
## [1] -240
max(eq_by_thresh[,c(1:5)])
## [1] 395
plot(B1~ratios, data=eq_by_thresh, type="l", ylim=c(min(eq_by_thresh[,c(1:5)]),max(eq_by_thresh[,c(1:5)])),
ylab= "Thousand Dollars", xlab="Ratio of SD(threshold)", main = "EOY In-Sample P&L by Threshold Width/Number of Betas",
sub="Maximized at betas - 1; ratio - 0.01", col=1)
lines(B2~ratios, data=eq_by_thresh, col=2)
lines(B3~ratios, data=eq_by_thresh, col=3)
lines(B4~ratios, data=eq_by_thresh, col=4)
lines(B5~ratios, data=eq_by_thresh, col=6)
abline(h=max(eq_by_thresh[,c(1:5)]), lty=2)
abline(v=final_ratio, lty=2)
legend("topright", c("One", "Two", "Three", "Four", "Five"), lty=c(1,1,1,1,1), col=c(1,2,3,4,6), bty="n")

data$spread <- data[,7+final_beta]
######################################################
# test against out-of-sample
######################################################
start_date="2015-01-01"
end_date="2015-12-31"
range=paste(start_date,"::",end_date,sep="")
xM2=MERK[range,6]
yP2=PFE[range,6]
dF=cbind(yP2,xM2)
names(dF)=c("P","M")
ddF=diff(log(dF))[-1]
beta_out_of_sample=lag(rolling_beta(ddF,12), 1)
data_out=merge(beta_out_of_sample,dF)
colnames(data_out)=c("b1", "b2", "b3", "b4", "b5", "P", "M")
data_out$spread1=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)
data_out$spread2=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)
data_out$spread3=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
beta_out_of_sample[,3]*lag(data_out$M,3)
data_out$spread4=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
beta_out_of_sample[,3]*lag(data_out$M,3)-beta_out_of_sample[,4]*lag(data_out$M,4)
data_out$spread5=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
beta_out_of_sample[,3]*lag(data_out$M,3)-beta_out_of_sample[,4]*lag(data_out$M,4)-
beta_out_of_sample[,5]*lag(data_out$M,5)
data_out$spread <- data_out[,7+final_beta]
data_out$upper = rolling_threshold(data_out$spread,10) + final_ratio*rolling_sd(data_out$spread,10)
data_out$lower = rolling_threshold(data_out$spread,10) - final_ratio*rolling_sd(data_out$spread,10)
data_out$upper[c(13:23),]=mean(data$spread, na.rm=T)+ final_ratio*sd(data$spread, na.rm=T)
data_out$lower[c(13:23),]=mean(data$spread, na.rm=T)- final_ratio*sd(data$spread, na.rm=T)
spreadmax=mean(data$spread, na.rm=T)+ 3*sd(data$spread, na.rm=T)
spreadmin=mean(data$spread, na.rm=T)- 3*sd(data$spread, na.rm=T)
buys=ifelse(data_out$spread > data_out$upper,1,0)
sells=ifelse(data_out$spread < data_out$lower,-1,0)
data_out$signal=buys+sells
plot.xts(data_out$spread,main="PFE vs MRK Out of Sample")
lines(data_out$upper, col="red")
lines(data_out$lower, col="red")
abline(h=spreadmax, lty=2, col="blue")
abline(h=spreadmin, lty=2, col="blue")
for(i in 14:length(data_out$spread)){
if(data_out$spread[i] > spreadmax && data_out$spread[i-1] > spreadmax) data_out$signal[i] = 0
if(data_out$spread[i] < spreadmin && data_out$spread[i-1] < spreadmin) data_out$signal[i] = 0
}
point_type=rep(NA,nrow(data_out))
buy_index=which(data_out$signal==1)
sell_index=which(data_out$signal==-1)
point_type[buy_index]=21
point_type[sell_index]=24
points(data_out$spread,pch=point_type)
legend("bottomleft", c("Buy signal", "Sell signal"), pch=c(21,24), bty="n")

num_of_buy_signals=sum(data_out$signal[data_out$signal==1],na.rm=TRUE)
num_of_sell_signals=sum(abs(data_out$signal[data_out$signal==-1]),na.rm=TRUE)
num_of_buy_signals
## [1] 121
num_of_sell_signals
## [1] 116
trade_size=1000
signal=as.numeric(data_out$signal)
signal[is.na(signal)]=0
dist.up=as.numeric(abs(data_out$spread-data_out$upper))
dist.up[is.na(dist.up)]=0
dist.down=as.numeric(abs(data_out$spread-data_out$lower))
dist.down[is.na(dist.down)]=0
qty_x=rep(0,length(signal))
qty_y=rep(0,length(signal))
for(i in 1:length(signal)){
if(signal[i]==1){
qty_x[i]=-round(dist.up[i]*trade_size)
qty_y[i]=round(dist.up[i]*2*trade_size)}
if(signal[i]==-1){
qty_x[i]=round(dist.down[i]*trade_size)
qty_y[i]=-round(dist.down[i]*2*trade_size)}
}
qty_x[length(qty_x)]=-sum(qty_x)
qty_y[length(qty_y)]=-sum(qty_y)
data_out$qty_x=qty_x
data_out$qty_y=qty_y
tail(data_out,3)
## b1 b2 b3 b4 b5
## 2015-12-29 0.20381791 0.038753179 -0.12764938 -0.10499637 0.0009766312
## 2015-12-30 0.05328495 -0.004184604 -0.26951648 0.01548899 0.1424042593
## 2015-12-31 -0.04335806 0.418275468 -0.09523404 -0.17360045 0.0346668033
## P M spread1 spread2 spread3 spread4 spread5
## 2015-12-29 31.63366 52.06743 21.12086 19.12162 25.72314 31.06294 31.01349
## 2015-12-30 31.55658 51.97958 28.78217 28.99800 42.90212 42.10109 34.85884
## 2015-12-31 31.10370 51.55983 33.35744 11.57891 16.49102 25.44691 23.65408
## spread upper lower signal qty_x qty_y
## 2015-12-29 21.12086 30.68645 30.60664 -1 9486 -18972
## 2015-12-30 28.78217 30.23477 30.15653 -1 1374 -2749
## 2015-12-31 33.35744 30.30075 30.22147 1 239542 -479077
data_out$equity_curve_x=compute_equity_curve(data_out$qty_x,data_out$M)
data_out$equity_curve_y=compute_equity_curve(data_out$qty_y,data_out$P)
equity_curve=(data_out$equity_curve_x+data_out$equity_curve_y)/1000
plot(equity_curve,main="Equity Curve",
cex.main=0.8,
cex.lab=0.8,
cex.axis=0.8,
ylab = "P&L: Thousand Dollars")
abline(h=0, lty=2)

plot(drawdown(equity_curve),main="Drawdown of Equity Curve",
cex.main=0.8,
cex.lab=0.8,
cex.axis=0.8,
ylab = "Thousand Dollars")

max(equity_curve)
## [1] 3610.934
mean(equity_curve)
## [1] 2269.087
equity_curve[252]
## equity_curve_x
## 2015-12-31 2587.784
max(drawdown(equity_curve))
## [1] 1753.219
mean(drawdown(equity_curve))
## [1] 474.868
drawdown(equity_curve)[252]
## equity_curve_x
## 2015-12-31 1023.151
equity <- as.numeric(equity_curve[,1])
equity_curve_returns <- diff(equity)/equity[-length(equity)]
invalid <- is.infinite(equity_curve_returns) | is.nan(equity_curve_returns)
sharpe_ratio(equity_curve_returns[!invalid], 0.03)
## [1] 0.0763933
sharpe_ratio(equity_curve_returns[!invalid], 0.03)*sqrt(252)
## [1] 1.212706
######################################################