######################################################
# Stat 6721 - Project
#
# Sarah Rathwell 
#
# Objective - Investigate Merck and Pfizer for 2013/14
#             and trade on 2015 - FINAL.
######################################################
rm(list=ls())
par(mfrow=c(1,1), oma=c(0,0,0,0))
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(plyr)
######################################################
cat(" Project - Merck vs Pfizer.\n\n",
    "Last modification:",date(),'\n')
##  Project - Merck vs Pfizer.
## 
##  Last modification: Sun Dec  4 20:26:14 2016
######################################################


MERK <- getSymbols('MRK', from = '2013-01-01',to = '2015-12-31', 
                   adjust = T, auto.assign = FALSE)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
PFE <- getSymbols('PFE', from = '2013-01-01',to = '2015-12-31', 
                  adjust = T, auto.assign = FALSE)
head(MERK)
##            MRK.Open MRK.High  MRK.Low MRK.Close MRK.Volume MRK.Adjusted
## 2013-01-02 37.89033 37.89938 37.05757  37.41964   15956900     36.52689
## 2013-01-03 37.91748 38.40627 37.21145  38.31576   23543400     37.40163
## 2013-01-04 38.43342 38.46963 37.85412  37.98989   15428500     37.08354
## 2013-01-07 38.05326 38.18903 37.88127  38.12567   11503600     37.21608
## 2013-01-08 38.24334 38.64161 38.17998  38.17998   14750900     37.26909
## 2013-01-09 38.37006 38.60541 38.19808  38.55110   10073100     37.63136
head(PFE)
##            PFE.Open PFE.High  PFE.Low PFE.Close PFE.Volume PFE.Adjusted
## 2013-01-02 23.09427 23.43802 22.91335  23.43802   33522200     22.58393
## 2013-01-03 23.50134 23.51039 23.12141  23.38374   33757400     22.53163
## 2013-01-04 23.38374 23.51039 23.30233  23.48325   28604900     22.62751
## 2013-01-07 23.43802 23.56466 23.31138  23.50134   25779900     22.64494
## 2013-01-08 23.51943 23.74558 23.50134  23.53752   31057000     22.67981
## 2013-01-09 23.71844 24.02600 23.69131  23.94459   34210200     23.07204
######################################################
# functions
######################################################

run_transfer <- function(dF){
  
  MRK.5 <- arima(dF[,2], order=c(5,0,0), method="ML")
  
  yPf <- filter(dF[,1], c(1, MRK.5$coef[1:5]), sides=1)[-c(1:5)]
  xMf <- filter(dF[,2], c(1, MRK.5$coef[1:5]), sides=1)[-c(1:5)]
  
  cyx=ccf(yPf,xMf,type=c("covariance"), lag.max=6, plot=F)
  c1=cyx[-1]
  c2=cyx[-2]
  c3=cyx[-3]
  c4=cyx[-4]
  c5=cyx[-5]
  
  axM=acf(xMf,type=c("covariance"), plot=F)
  v0=axM[0]
  
  b_1=as.numeric(c1[[1]])/as.numeric(v0[[1]])
  b_2=as.numeric(c2[[1]])/as.numeric(v0[[1]])
  b_3=as.numeric(c3[[1]])/as.numeric(v0[[1]])
  b_4=as.numeric(c4[[1]])/as.numeric(v0[[1]])
  b_5=as.numeric(c5[[1]])/as.numeric(v0[[1]])
  
  return(c(b_1,b_2,b_3,b_4,b_5))
  
} 


rolling_beta=function(z,width){
  rollapply(z,width=width,FUN=run_transfer,by.column=FALSE,align="right")
}

rolling_threshold=function(z,width){
  rollapply(z,width=width,FUN=mean,by.column=FALSE,align="right")
}

rolling_sd=function(z,width){
  rollapply(z,width=width, FUN=sd, by.column=FALSE,align="right")
}

compute_equity_curve=function(qty,price){
  cash_buy=ifelse(sign(qty)==1,qty*price,0)
  cash_sell=ifelse(sign(qty)==-1,-qty*price,0)
  position=cumsum(qty)
  cumulative_buy=cumsum(cash_buy)
  cumulative_sell=cumsum(cash_sell)
  equity=cumulative_sell-cumulative_buy+position*price
  return(equity)
}

drawdown=function(x){
  cummax(x)-x
}

sharpe_ratio <- function(x, rf){
  sharpe <- (mean(x, na.rm = T) - rf)/sd(x, na.rm = T)
  return(sharpe)
}

maximize_thresh <- function(M){
  
  
  data$upper = rolling_threshold(data$spread,10) + M*rolling_sd(data$spread,10)
  data$lower = rolling_threshold(data$spread,10) - M*rolling_sd(data$spread,10)
  
  buys=ifelse(data$spread > data$upper,1,0)
  sells=ifelse(data$spread < data$lower,-1,0)
  data$signal=buys+sells
  
  

  trade_size=1000
  signal=as.numeric(data$signal)
  signal[is.na(signal)]=0
  dist.up=as.numeric(abs(data$spread-data$upper))
  dist.up[is.na(dist.up)]=0
  dist.down=as.numeric(abs(data$spread-data$lower))
  dist.down[is.na(dist.down)]=0
  
  
  qty_x=rep(0,length(signal))
  qty_y=rep(0,length(signal))
  
  
  for(i in 1:length(signal)){
    
    
    if(signal[i]==1 ){
      
      qty_x[i]=-round(dist.up[i]*trade_size)
      qty_y[i]=round(dist.up[i]*2*trade_size)}
    
    if(signal[i]==-1){
      
      qty_x[i]=round(dist.down[i]*trade_size)
      qty_y[i]=-round(dist.down[i]*2*trade_size)}
    
  }
  
  qty_x[length(qty_x)]=-sum(qty_x)
  qty_y[length(qty_y)]=-sum(qty_y)
  
  data$qty_x=qty_x
  data$qty_y=qty_y
  
  data$equity_curve_x=compute_equity_curve(data$qty_x,data$M)
  data$equity_curve_y=compute_equity_curve(data$qty_y,data$P)
  
  return(data$equity_curve_x[252]+data$equity_curve_y[252])
  
}

######################################################
# in-sample / tuning
######################################################

start_date="2013-01-01"
end_date="2014-12-31"
range=paste(start_date,"::",end_date,sep="")
xM2=MERK[range,6]
yP2=PFE[range,6]
dF=cbind(yP2,xM2)
names(dF)=c("P","M")
ddF=diff(log(dF))[-1]

betas=lag(rolling_beta(ddF,12), 1)
## Warning in log(s2): NaNs produced
data=merge(betas,dF)
colnames(data)=c("b1", "b2", "b3", "b4", "b5", "P", "M")

data$spread1=data$P-betas[,1]*lag(data$M,1)
data$spread2=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)
data$spread3=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)
data$spread4=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)-betas[,4]*lag(data$M,4)
data$spread5=data$P-betas[,1]*lag(data$M,1)-betas[,2]*lag(data$M,2)-betas[,3]*lag(data$M,3)-betas[,4]*lag(data$M,4)-
             betas[,5]*lag(data$M,5)



max_eq=0
ratio=0
eq_by_thresh <- as.data.frame(matrix(NA,nrow=100,ncol=5))
colnames(eq_by_thresh) <- c("B1", "B2", "B3", "B4", "B5")
eq_by_thresh$ratios <- seq(.01,1,.01)

for(i in 1:5){
  
  data$spread=data[,7+i]  
  eq_by_thresh[,i] <- as.integer(aaply(seq(.01,1,.01),1,.fun=maximize_thresh))
  max_eq[i] <-mean(as.numeric(max(eq_by_thresh[,i])))
  ratio[i] <- mean(as.numeric(which(eq_by_thresh[,i]==max(eq_by_thresh[,i]))/100))
  
} 

tuned <- as.data.frame(cbind(max_eq,ratio))
tuned
##    max_eq ratio
## 1  394622  0.01
## 2  138334  0.66
## 3  241224  0.01
## 4  -11294  0.01
## 5 -161311  1.00
final_beta <- which(max_eq==max(max_eq))
final_ratio <- tuned$ratio[final_beta]

eq_by_thresh[,c(1:5)] <- round(eq_by_thresh[,c(1:5)]/1000, digits=0)
min(eq_by_thresh[,c(1:5)])
## [1] -240
max(eq_by_thresh[,c(1:5)])
## [1] 395
plot(B1~ratios, data=eq_by_thresh, type="l", ylim=c(min(eq_by_thresh[,c(1:5)]),max(eq_by_thresh[,c(1:5)])), 
     ylab= "Thousand Dollars", xlab="Ratio of SD(threshold)", main = "EOY In-Sample P&L by Threshold Width/Number of Betas", 
     sub="Maximized at betas - 1; ratio - 0.01", col=1)
lines(B2~ratios, data=eq_by_thresh, col=2)
lines(B3~ratios, data=eq_by_thresh, col=3)
lines(B4~ratios, data=eq_by_thresh, col=4)
lines(B5~ratios, data=eq_by_thresh, col=6)
abline(h=max(eq_by_thresh[,c(1:5)]), lty=2)
abline(v=final_ratio, lty=2)
legend("topright", c("One", "Two", "Three", "Four", "Five"), lty=c(1,1,1,1,1), col=c(1,2,3,4,6), bty="n")

data$spread <- data[,7+final_beta]

######################################################
# test against out-of-sample
######################################################
start_date="2015-01-01"
end_date="2015-12-31"
range=paste(start_date,"::",end_date,sep="")
xM2=MERK[range,6]
yP2=PFE[range,6]
dF=cbind(yP2,xM2)
names(dF)=c("P","M")
ddF=diff(log(dF))[-1]

beta_out_of_sample=lag(rolling_beta(ddF,12), 1)
data_out=merge(beta_out_of_sample,dF)
colnames(data_out)=c("b1", "b2", "b3", "b4", "b5", "P", "M")

data_out$spread1=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)
data_out$spread2=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)
data_out$spread3=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
  beta_out_of_sample[,3]*lag(data_out$M,3)
data_out$spread4=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
  beta_out_of_sample[,3]*lag(data_out$M,3)-beta_out_of_sample[,4]*lag(data_out$M,4)
data_out$spread5=data_out$P-beta_out_of_sample[,1]*lag(data_out$M,1)-beta_out_of_sample[,2]*lag(data_out$M,2)-
  beta_out_of_sample[,3]*lag(data_out$M,3)-beta_out_of_sample[,4]*lag(data_out$M,4)-
  beta_out_of_sample[,5]*lag(data_out$M,5)

data_out$spread <- data_out[,7+final_beta]

data_out$upper = rolling_threshold(data_out$spread,10) + final_ratio*rolling_sd(data_out$spread,10)
data_out$lower = rolling_threshold(data_out$spread,10) - final_ratio*rolling_sd(data_out$spread,10)

data_out$upper[c(13:23),]=mean(data$spread, na.rm=T)+ final_ratio*sd(data$spread, na.rm=T)
data_out$lower[c(13:23),]=mean(data$spread, na.rm=T)- final_ratio*sd(data$spread, na.rm=T)

spreadmax=mean(data$spread, na.rm=T)+ 3*sd(data$spread, na.rm=T)
spreadmin=mean(data$spread, na.rm=T)- 3*sd(data$spread, na.rm=T)

buys=ifelse(data_out$spread > data_out$upper,1,0)
sells=ifelse(data_out$spread < data_out$lower,-1,0)
data_out$signal=buys+sells

plot.xts(data_out$spread,main="PFE vs MRK Out of Sample")
lines(data_out$upper, col="red")
lines(data_out$lower, col="red")
abline(h=spreadmax, lty=2, col="blue")
abline(h=spreadmin, lty=2, col="blue")


for(i in 14:length(data_out$spread)){
  if(data_out$spread[i] > spreadmax && data_out$spread[i-1] > spreadmax) data_out$signal[i] = 0

  if(data_out$spread[i] < spreadmin && data_out$spread[i-1] < spreadmin) data_out$signal[i] = 0
} 

point_type=rep(NA,nrow(data_out))
buy_index=which(data_out$signal==1)
sell_index=which(data_out$signal==-1)
point_type[buy_index]=21
point_type[sell_index]=24

points(data_out$spread,pch=point_type)
legend("bottomleft", c("Buy signal", "Sell signal"), pch=c(21,24), bty="n")

num_of_buy_signals=sum(data_out$signal[data_out$signal==1],na.rm=TRUE)
num_of_sell_signals=sum(abs(data_out$signal[data_out$signal==-1]),na.rm=TRUE)

num_of_buy_signals 
## [1] 121
num_of_sell_signals 
## [1] 116
trade_size=1000 

signal=as.numeric(data_out$signal)
signal[is.na(signal)]=0
dist.up=as.numeric(abs(data_out$spread-data_out$upper))
dist.up[is.na(dist.up)]=0
dist.down=as.numeric(abs(data_out$spread-data_out$lower))
dist.down[is.na(dist.down)]=0


qty_x=rep(0,length(signal))
qty_y=rep(0,length(signal))


for(i in 1:length(signal)){
  
  
  if(signal[i]==1){
    
    qty_x[i]=-round(dist.up[i]*trade_size)
    qty_y[i]=round(dist.up[i]*2*trade_size)}
  
  if(signal[i]==-1){
    
    qty_x[i]=round(dist.down[i]*trade_size)
    qty_y[i]=-round(dist.down[i]*2*trade_size)}
  
}

qty_x[length(qty_x)]=-sum(qty_x)
qty_y[length(qty_y)]=-sum(qty_y)

data_out$qty_x=qty_x
data_out$qty_y=qty_y

tail(data_out,3)
##                     b1           b2          b3          b4           b5
## 2015-12-29  0.20381791  0.038753179 -0.12764938 -0.10499637 0.0009766312
## 2015-12-30  0.05328495 -0.004184604 -0.26951648  0.01548899 0.1424042593
## 2015-12-31 -0.04335806  0.418275468 -0.09523404 -0.17360045 0.0346668033
##                   P        M  spread1  spread2  spread3  spread4  spread5
## 2015-12-29 31.63366 52.06743 21.12086 19.12162 25.72314 31.06294 31.01349
## 2015-12-30 31.55658 51.97958 28.78217 28.99800 42.90212 42.10109 34.85884
## 2015-12-31 31.10370 51.55983 33.35744 11.57891 16.49102 25.44691 23.65408
##              spread    upper    lower signal  qty_x   qty_y
## 2015-12-29 21.12086 30.68645 30.60664     -1   9486  -18972
## 2015-12-30 28.78217 30.23477 30.15653     -1   1374   -2749
## 2015-12-31 33.35744 30.30075 30.22147      1 239542 -479077
data_out$equity_curve_x=compute_equity_curve(data_out$qty_x,data_out$M)
data_out$equity_curve_y=compute_equity_curve(data_out$qty_y,data_out$P)

equity_curve=(data_out$equity_curve_x+data_out$equity_curve_y)/1000


plot(equity_curve,main="Equity Curve",
     cex.main=0.8,
     cex.lab=0.8,
     cex.axis=0.8,
     ylab = "P&L: Thousand Dollars")
abline(h=0, lty=2)

plot(drawdown(equity_curve),main="Drawdown of Equity Curve",
     cex.main=0.8,
     cex.lab=0.8,
     cex.axis=0.8, 
     ylab = "Thousand Dollars")

max(equity_curve)
## [1] 3610.934
mean(equity_curve)
## [1] 2269.087
equity_curve[252]
##            equity_curve_x
## 2015-12-31       2587.784
max(drawdown(equity_curve))
## [1] 1753.219
mean(drawdown(equity_curve))
## [1] 474.868
drawdown(equity_curve)[252]
##            equity_curve_x
## 2015-12-31       1023.151
equity <- as.numeric(equity_curve[,1])
equity_curve_returns <- diff(equity)/equity[-length(equity)]
invalid <- is.infinite(equity_curve_returns) | is.nan(equity_curve_returns)
sharpe_ratio(equity_curve_returns[!invalid], 0.03)
## [1] 0.0763933
sharpe_ratio(equity_curve_returns[!invalid], 0.03)*sqrt(252)
## [1] 1.212706
######################################################