######################################################
# Stat 6721 - Assignment 1
#
# Sarah Rathwell
#
# Objective - Investigate Merck and Pfizer for 2014
# and trade on 2015.
######################################################
rm(list=ls())
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
######################################################
cat(" Initial look - Merk vs Pfizer.\n\n",
"Last modification:",date(),'\n')
## Initial look - Merk vs Pfizer.
##
## Last modification: Thu Oct 20 15:46:42 2016
######################################################
MERK <- getSymbols('MRK', from = '2014-01-01',to = '2015-12-31',
adjust = T, auto.assign = FALSE)
## As of 0.4-0, 'getSymbols' uses env=parent.frame() and
## auto.assign=TRUE by default.
##
## This behavior will be phased out in 0.5-0 when the call will
## default to use auto.assign=FALSE. getOption("getSymbols.env") and
## getOptions("getSymbols.auto.assign") are now checked for alternate defaults
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
PFE <- getSymbols('PFE', from = '2014-01-01',to = '2015-12-31',
adjust = T, auto.assign = FALSE)
head(MERK)
## MRK.Open MRK.High MRK.Low MRK.Close MRK.Volume MRK.Adjusted
## 2014-01-02 46.83928 46.98953 46.29464 46.47306 7886500 45.36431
## 2014-01-03 46.47306 46.95196 46.44488 46.69842 6435000 45.58430
## 2014-01-06 46.77355 47.08343 46.49183 46.70782 9931400 45.59347
## 2014-01-07 46.91440 47.32758 46.89562 47.05526 9995800 45.93263
## 2014-01-08 47.04587 47.08343 46.57635 46.75477 13695700 45.63930
## 2014-01-09 46.78294 46.95196 46.33220 46.50122 9323900 45.39181
head(PFE)
## PFE.Open PFE.High PFE.Low PFE.Close PFE.Volume PFE.Adjusted
## 2014-01-02 28.48309 28.60462 28.35222 28.47374 17425300 27.70978
## 2014-01-03 28.40831 28.81962 28.39896 28.52983 15035400 27.76436
## 2014-01-06 28.76353 28.93179 28.53918 28.55788 23961500 27.79165
## 2014-01-07 28.84766 28.99723 28.73549 28.73549 22223000 27.96450
## 2014-01-08 28.77288 28.96918 28.65136 28.93179 22923200 28.15554
## 2014-01-09 29.05332 29.09071 28.61397 28.91310 20581500 28.13734
chartSeries(MERK, subset = '2014::2014-04', theme = chartTheme('white'),
TA = 'addVo(); addBBands()')

chartSeries(PFE, subset = '2014::2014-04', theme = chartTheme('white'),
TA = 'addVo(); addBBands()')

### total LS regression
x=diff(as.numeric(PFE[,4]))
y=diff(as.numeric(MERK[,4]))
plot(x,y)
abline(lm(y~x))
abline(lm(x~y),lty=2)
r=prcomp(~x+y)
slope=r$rotation[2,1]/r$rotation[1,1]
intercept=r$center[2]-slope*r$center[1]
abline(a=intercept,b=slope,lty=3)

### constructing the spread
calculate_spread=function(x,y,beta){return(y-beta*x)}
calculate_beta_and_level=function(x,y,start_date,end_date){
require(xts)
time_range=paste(start_date,"::",end_date,sep="")
x=x[time_range]
y=y[time_range]
dx=diff(x[time_range])
dy=diff(y[time_range])
r=prcomp(~dx+dy)
beta=r$rotation[2,1]/r$rotation[1,1]
spread=calculate_spread(x,y,beta)
names(spread)="spread"
level=mean(spread,na.rm=TRUE)
outL=list()
outL$spread=spread
outL$beta=beta
outL$level=level
return(outL)
}
calculate_buy_sell_signals=function(spread,beta,level,lower_threshold,upper_threshold){
buy_signals=ifelse(spread<=level-lower_threshold,1,0)
sell_signals=ifelse(spread>=level+upper_threshold,1,0)
output=cbind(spread,buy_signals,sell_signals)
colnames(output)=c("spread","buy_signals","sell_signals")
return(output)
}
### signal generation and validation for rolling window
window_length=10
# in sample
start_date="2014-01-01"
end_date="2014-12-31"
range=paste(start_date,"::",end_date,sep="")
x=PFE[range,6]
y=MERK[range,6]
dF=cbind(x,y)
names(dF)=c("x","y")
run_regression=function(dF){
return(coef(lm(y~x-1,data=as.data.frame(dF))))
}
# y~x-1 means no intercept
rolling_beta=function(z,width){
rollapply(z,width=width,FUN=run_regression,by.column=FALSE,align="right")
}
betas=rolling_beta(diff(dF),10)
data=merge(betas,dF)
data$spread=data$y-lag(betas,1)*data$x
returns=diff(dF)/dF
return_beta=rolling_beta(returns,10)
data$spreadR=diff(data$y)/data$y-return_beta*diff(data$x)/data$x
tail(data)
## betas x y spread spreadR
## 2014-12-23 1.962627 29.61721 54.04233 1.376813 -0.008884876
## 2014-12-24 1.964650 29.58895 54.27849 -3.793581 0.005370205
## 2014-12-26 1.973137 29.80555 54.58077 -3.976699 -0.002255313
## 2014-12-29 1.782135 29.56070 54.53354 -3.793771 0.007186639
## 2014-12-30 1.746206 29.55128 54.45797 1.793582 -0.001084284
## 2014-12-31 1.837884 29.33469 53.64559 2.421177 -0.007728864
threshold=sd(data$spread,na.rm=TRUE)
threshold
## [1] 24.30036
# 24.30036
spread.mean=mean(data$spread,na.rm=TRUE)
spread.mean
## [1] 21.80522
#21.80522
spread.med=median(data$spread,na.rm=TRUE)
spread.med
## [1] 22.2964
#22.2964
plot(data$spreadR,main="Rolling Beta Spread")
abline(h=0, lty=2)

plot.xts(data$spread,main="PFE vs MERK In-Sample",
sub="Thresholds about 0 and the mean shown")
abline(h=threshold,lty=3)
abline(h=-threshold,lty=3)
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)
legend("bottomleft", c("About 0", "Shifted"), lty=c(3,2), bty="n")

# out of sample
start_date="2015-01-01"
end_date="2015-12-31"
range=paste(start_date,"::",end_date,sep="")
x=PFE[range,6]
y=MERK[range,6]
dF=cbind(x,y)
names(dF)=c("x","y")
beta_out_of_sample=rolling_beta(diff(dF),10)
data_out=merge(beta_out_of_sample,dF)
data_out$spread=data_out$y-lag(beta_out_of_sample,1)*data_out$x
plot.xts(data_out$spread,main="PFE vs MERK out of sample",
sub="Thresholds about 0 and the mean shown")
threshold=sd(data$spread,na.rm=TRUE)
abline(h=threshold,lty=3)
abline(h=-threshold,lty=3)
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)
legend("bottomleft", c("About 0", "Shifted"), lty=c(3,2), bty="n")

### trading strategy (1st Assignment - Oct 7)
buys=ifelse(data_out$spread > spread.mean+.4*threshold,1,0)
sells=ifelse(data_out$spread < spread.mean-.4*threshold,-1,0)
data_out$signal=buys+sells
plot.xts(data_out$spread,main="PFE vs MERK out of sample")
abline(h=spread.mean+.4*threshold, lty=2)
abline(h=spread.mean-.4*threshold, lty=2)
point_type=rep(NA,nrow(data_out))
buy_index=which(data_out$signal==1)
sell_index=which(data_out$signal==-1)
point_type[buy_index]=21
point_type[sell_index]=24
points(data_out$spread,pch=point_type)

num_of_buy_signals=sum(buys,na.rm=TRUE)
num_of_sell_signals=sum(abs(sells),na.rm=TRUE)
num_of_buy_signals
## [1] 61
#90
num_of_sell_signals
## [1] 103
#26
### trading the spread
prev_x_qty=0
position=0
trade_size=100
signal=as.numeric(data_out$signal)
signal[is.na(signal)]=0
beta=as.numeric(data_out$beta_out_of_sample)
qty_x=rep(0,length(signal))
qty_y=rep(0,length(signal))
for(i in 1:length(signal)){
if(signal[i]==1 && position==0){
prev_x_qty=round(beta[i]*trade_size)
qty_x[i]=prev_x_qty
qty_y[i]=trade_size
position=1}
if(signal==-1 && position==0){
prev_x_qty=round(beta[i]*trade_size)
qty_x[i]=prev_x_qty
qty_y[i]=-trade_size
position=-1}
if(signal[i]==1 && position==-1){
qty_x[i]=-(round(beta[i]*trade_size)+prev_x_qty)
prev_x_qty=round(beta[i]*trade_size)
qty_y[i]=2*trade_size
position=1}
if(signal[i]==-1 && position==1){
qty_x[i]=round(beta[i]*trade_size)+prev_x_qty
prev_x_qty=round(beta[i]*trade_size)
qty_y[i]=-2*trade_size
position=-1}
}
qty_x[length(qty_x)]=-sum(qty_x)
qty_y[length(qty_y)]=-sum(qty_y)
data_out$qty_x=qty_x
data_out$qty_y=qty_y
tail(data_out,3)
## beta_out_of_sample x y spread signal qty_x
## 2015-12-29 2.017314 31.94916 52.06743 -18.564483 -1 0
## 2015-12-30 1.949771 31.87130 51.97958 -12.314846 -1 0
## 2015-12-31 1.489828 31.41391 51.55983 -9.690087 -1 -373
## qty_y
## 2015-12-29 0
## 2015-12-30 0
## 2015-12-31 100
compute_equity_curve=function(qty,price){
cash_buy=ifelse(sign(qty)==1,qty*price,0)
cash_sell=ifelse(sign(qty)==-1,-qty*price,0)
position=cumsum(qty)
cumulative_buy=cumsum(cash_buy)
cumulative_sell=cumsum(cash_sell)
equity=cumulative_sell-cumulative_buy+position*price
return(equity)
}
data_out$equity_curve_x=compute_equity_curve(data_out$qty_x,data_out$x)
data_out$equity_curve_y=compute_equity_curve(data_out$qty_y,data_out$y)
plot(data_out$equity_curve_x+data_out$equity_curve_y,type='l',main="PFE/MERK Spread",ylab="P&L")

drawdown=function(x){cummax(x)-x}
equity_curve=data_out$equity_curve_x+data_out$equity_curve_y
par(mfrow=c(2,1))
plot(equity_curve,main="Equity Curve",
cex.main=0.8,
cex.lab=0.8,
cex.axis=0.8)
abline(h=0, lty=2)
plot(drawdown(equity_curve),main="Drawdown of Equity Curve",
cex.main=0.8,
cex.lab=0.8,
cex.axis=0.8)

par(mfrow=c(1,1))