######################################################
# Stat 6721 - Assignment 1
#
# Sarah Rathwell 
#
# Objective - Investigate Merck and Pfizer for 2014 
#             and trade on 2015.
######################################################
rm(list=ls())
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
######################################################
cat(" Initial look - Merk vs Pfizer.\n\n",
    "Last modification:",date(),'\n')
##  Initial look - Merk vs Pfizer.
## 
##  Last modification: Thu Oct 20 15:46:42 2016
######################################################


MERK <- getSymbols('MRK', from = '2014-01-01',to = '2015-12-31', 
                  adjust = T, auto.assign = FALSE)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
PFE <- getSymbols('PFE', from = '2014-01-01',to = '2015-12-31', 
                   adjust = T, auto.assign = FALSE)
head(MERK)
##            MRK.Open MRK.High  MRK.Low MRK.Close MRK.Volume MRK.Adjusted
## 2014-01-02 46.83928 46.98953 46.29464  46.47306    7886500     45.36431
## 2014-01-03 46.47306 46.95196 46.44488  46.69842    6435000     45.58430
## 2014-01-06 46.77355 47.08343 46.49183  46.70782    9931400     45.59347
## 2014-01-07 46.91440 47.32758 46.89562  47.05526    9995800     45.93263
## 2014-01-08 47.04587 47.08343 46.57635  46.75477   13695700     45.63930
## 2014-01-09 46.78294 46.95196 46.33220  46.50122    9323900     45.39181
head(PFE)
##            PFE.Open PFE.High  PFE.Low PFE.Close PFE.Volume PFE.Adjusted
## 2014-01-02 28.48309 28.60462 28.35222  28.47374   17425300     27.70978
## 2014-01-03 28.40831 28.81962 28.39896  28.52983   15035400     27.76436
## 2014-01-06 28.76353 28.93179 28.53918  28.55788   23961500     27.79165
## 2014-01-07 28.84766 28.99723 28.73549  28.73549   22223000     27.96450
## 2014-01-08 28.77288 28.96918 28.65136  28.93179   22923200     28.15554
## 2014-01-09 29.05332 29.09071 28.61397  28.91310   20581500     28.13734
chartSeries(MERK, subset = '2014::2014-04', theme = chartTheme('white'),
            TA = 'addVo(); addBBands()')

chartSeries(PFE, subset = '2014::2014-04', theme = chartTheme('white'),
            TA = 'addVo(); addBBands()')

### total LS regression

x=diff(as.numeric(PFE[,4]))
y=diff(as.numeric(MERK[,4]))

plot(x,y)
abline(lm(y~x))
abline(lm(x~y),lty=2)

r=prcomp(~x+y)
slope=r$rotation[2,1]/r$rotation[1,1]
intercept=r$center[2]-slope*r$center[1]
abline(a=intercept,b=slope,lty=3)

### constructing the spread

calculate_spread=function(x,y,beta){return(y-beta*x)}

calculate_beta_and_level=function(x,y,start_date,end_date){
  
  require(xts)
  
  time_range=paste(start_date,"::",end_date,sep="")
  x=x[time_range]
  y=y[time_range]
  dx=diff(x[time_range])
  dy=diff(y[time_range])
  r=prcomp(~dx+dy)
  beta=r$rotation[2,1]/r$rotation[1,1]
  spread=calculate_spread(x,y,beta)
  names(spread)="spread"
  level=mean(spread,na.rm=TRUE)
  outL=list()
  outL$spread=spread
  outL$beta=beta
  outL$level=level
  return(outL)
  
}

calculate_buy_sell_signals=function(spread,beta,level,lower_threshold,upper_threshold){
  
  buy_signals=ifelse(spread<=level-lower_threshold,1,0)
  sell_signals=ifelse(spread>=level+upper_threshold,1,0)
  output=cbind(spread,buy_signals,sell_signals)
  colnames(output)=c("spread","buy_signals","sell_signals")
  return(output)
  
}

### signal generation and validation for rolling window 

window_length=10

# in sample

start_date="2014-01-01"
end_date="2014-12-31"
range=paste(start_date,"::",end_date,sep="")
x=PFE[range,6]
y=MERK[range,6]
dF=cbind(x,y)
names(dF)=c("x","y")

run_regression=function(dF){
  return(coef(lm(y~x-1,data=as.data.frame(dF))))
  }
# y~x-1 means no intercept

rolling_beta=function(z,width){
  rollapply(z,width=width,FUN=run_regression,by.column=FALSE,align="right")
  }

betas=rolling_beta(diff(dF),10)

data=merge(betas,dF)
data$spread=data$y-lag(betas,1)*data$x   

returns=diff(dF)/dF
return_beta=rolling_beta(returns,10)
data$spreadR=diff(data$y)/data$y-return_beta*diff(data$x)/data$x 

tail(data)
##               betas        x        y    spread      spreadR
## 2014-12-23 1.962627 29.61721 54.04233  1.376813 -0.008884876
## 2014-12-24 1.964650 29.58895 54.27849 -3.793581  0.005370205
## 2014-12-26 1.973137 29.80555 54.58077 -3.976699 -0.002255313
## 2014-12-29 1.782135 29.56070 54.53354 -3.793771  0.007186639
## 2014-12-30 1.746206 29.55128 54.45797  1.793582 -0.001084284
## 2014-12-31 1.837884 29.33469 53.64559  2.421177 -0.007728864
threshold=sd(data$spread,na.rm=TRUE)
threshold
## [1] 24.30036
# 24.30036
spread.mean=mean(data$spread,na.rm=TRUE)
spread.mean
## [1] 21.80522
#21.80522
spread.med=median(data$spread,na.rm=TRUE)
spread.med
## [1] 22.2964
#22.2964

plot(data$spreadR,main="Rolling Beta Spread")
abline(h=0, lty=2)

plot.xts(data$spread,main="PFE vs MERK In-Sample",
     sub="Thresholds about 0 and the mean shown")
abline(h=threshold,lty=3)
abline(h=-threshold,lty=3)
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)
legend("bottomleft", c("About 0", "Shifted"), lty=c(3,2), bty="n")

# out of sample

start_date="2015-01-01"
end_date="2015-12-31"
range=paste(start_date,"::",end_date,sep="")

x=PFE[range,6]
y=MERK[range,6]

dF=cbind(x,y)
names(dF)=c("x","y")

beta_out_of_sample=rolling_beta(diff(dF),10)

data_out=merge(beta_out_of_sample,dF)
data_out$spread=data_out$y-lag(beta_out_of_sample,1)*data_out$x

plot.xts(data_out$spread,main="PFE vs MERK out of sample",
         sub="Thresholds about 0 and the mean shown")
threshold=sd(data$spread,na.rm=TRUE)
abline(h=threshold,lty=3)
abline(h=-threshold,lty=3)
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)
legend("bottomleft", c("About 0", "Shifted"), lty=c(3,2), bty="n")

### trading strategy (1st Assignment - Oct 7)

buys=ifelse(data_out$spread > spread.mean+.4*threshold,1,0)
sells=ifelse(data_out$spread < spread.mean-.4*threshold,-1,0)
data_out$signal=buys+sells

plot.xts(data_out$spread,main="PFE vs MERK out of sample")
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)


point_type=rep(NA,nrow(data_out))
buy_index=which(data_out$signal==1)
sell_index=which(data_out$signal==-1)
point_type[buy_index]=21
point_type[sell_index]=24

points(data_out$spread,pch=point_type)

num_of_buy_signals=sum(buys,na.rm=TRUE)
num_of_sell_signals=sum(abs(sells),na.rm=TRUE)

num_of_buy_signals
## [1] 61
#90
num_of_sell_signals
## [1] 103
#26


### trading the spread

prev_x_qty=0
position=0
trade_size=100
signal=as.numeric(data_out$signal)
signal[is.na(signal)]=0

beta=as.numeric(data_out$beta_out_of_sample)

qty_x=rep(0,length(signal))
qty_y=rep(0,length(signal))


for(i in 1:length(signal)){
  
  if(signal[i]==1 && position==0){
    
    prev_x_qty=round(beta[i]*trade_size)
    qty_x[i]=prev_x_qty
    qty_y[i]=trade_size
    position=1}
  
  if(signal==-1 && position==0){
    
    prev_x_qty=round(beta[i]*trade_size)
    qty_x[i]=prev_x_qty
    qty_y[i]=-trade_size
    position=-1}
  
  if(signal[i]==1 && position==-1){
    
    qty_x[i]=-(round(beta[i]*trade_size)+prev_x_qty)
    prev_x_qty=round(beta[i]*trade_size)
    qty_y[i]=2*trade_size
    position=1}
  
  if(signal[i]==-1 && position==1){
    
    qty_x[i]=round(beta[i]*trade_size)+prev_x_qty
    prev_x_qty=round(beta[i]*trade_size)
    qty_y[i]=-2*trade_size
    position=-1}
}

qty_x[length(qty_x)]=-sum(qty_x)
qty_y[length(qty_y)]=-sum(qty_y)

data_out$qty_x=qty_x
data_out$qty_y=qty_y

tail(data_out,3)
##            beta_out_of_sample        x        y     spread signal qty_x
## 2015-12-29           2.017314 31.94916 52.06743 -18.564483     -1     0
## 2015-12-30           1.949771 31.87130 51.97958 -12.314846     -1     0
## 2015-12-31           1.489828 31.41391 51.55983  -9.690087     -1  -373
##            qty_y
## 2015-12-29     0
## 2015-12-30     0
## 2015-12-31   100
compute_equity_curve=function(qty,price){
  cash_buy=ifelse(sign(qty)==1,qty*price,0)
  cash_sell=ifelse(sign(qty)==-1,-qty*price,0)
  position=cumsum(qty)
  cumulative_buy=cumsum(cash_buy)
  cumulative_sell=cumsum(cash_sell)
  equity=cumulative_sell-cumulative_buy+position*price
  return(equity)
}

data_out$equity_curve_x=compute_equity_curve(data_out$qty_x,data_out$x)
data_out$equity_curve_y=compute_equity_curve(data_out$qty_y,data_out$y)

plot(data_out$equity_curve_x+data_out$equity_curve_y,type='l',main="PFE/MERK Spread",ylab="P&L")

drawdown=function(x){cummax(x)-x}

equity_curve=data_out$equity_curve_x+data_out$equity_curve_y

par(mfrow=c(2,1))

plot(equity_curve,main="Equity Curve",
     cex.main=0.8,
     cex.lab=0.8,
     cex.axis=0.8)
abline(h=0, lty=2)

plot(drawdown(equity_curve),main="Drawdown of Equity Curve",
     cex.main=0.8,
     cex.lab=0.8,
     cex.axis=0.8)

par(mfrow=c(1,1))