######################################################
# Stat 6721 - Assignment 2
#
# Sarah Rathwell 
#
# Objective - Investigate Merck and Pfizer for 2014 
#             and trade on 2015 - Correlations.
######################################################
rm(list=ls())
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
######################################################
cat(" Second look - Merk vs Pfizer.\n\n",
    "Last modification:",date(),'\n')
##  Second look - Merk vs Pfizer.
## 
##  Last modification: Thu Oct 20 20:03:38 2016
######################################################


MERK <- getSymbols('MRK', from = '2014-01-01',to = '2015-12-31', 
                  adjust = T, auto.assign = FALSE)
##     As of 0.4-0, 'getSymbols' uses env=parent.frame() and
##  auto.assign=TRUE by default.
## 
##  This  behavior  will be  phased out in 0.5-0  when the call  will
##  default to use auto.assign=FALSE. getOption("getSymbols.env") and 
##  getOptions("getSymbols.auto.assign") are now checked for alternate defaults
## 
##  This message is shown once per session and may be disabled by setting 
##  options("getSymbols.warning4.0"=FALSE). See ?getSymbols for more details.
PFE <- getSymbols('PFE', from = '2014-01-01',to = '2015-12-31', 
                   adjust = T, auto.assign = FALSE)
head(MERK)
##            MRK.Open MRK.High  MRK.Low MRK.Close MRK.Volume MRK.Adjusted
## 2014-01-02 46.83928 46.98953 46.29464  46.47306    7886500     45.36431
## 2014-01-03 46.47306 46.95196 46.44488  46.69842    6435000     45.58430
## 2014-01-06 46.77355 47.08343 46.49183  46.70782    9931400     45.59347
## 2014-01-07 46.91440 47.32758 46.89562  47.05526    9995800     45.93263
## 2014-01-08 47.04587 47.08343 46.57635  46.75477   13695700     45.63930
## 2014-01-09 46.78294 46.95196 46.33220  46.50122    9323900     45.39181
head(PFE)
##            PFE.Open PFE.High  PFE.Low PFE.Close PFE.Volume PFE.Adjusted
## 2014-01-02 28.48309 28.60462 28.35222  28.47374   17425300     27.70978
## 2014-01-03 28.40831 28.81962 28.39896  28.52983   15035400     27.76436
## 2014-01-06 28.76353 28.93179 28.53918  28.55788   23961500     27.79165
## 2014-01-07 28.84766 28.99723 28.73549  28.73549   22223000     27.96450
## 2014-01-08 28.77288 28.96918 28.65136  28.93179   22923200     28.15554
## 2014-01-09 29.05332 29.09071 28.61397  28.91310   20581500     28.13734
### total LS regression

x=diff(as.numeric(PFE[,4]))
y=diff(as.numeric(MERK[,4]))

plot(x,y)
abline(lm(y~x))
abline(lm(x~y),lty=2)

r=prcomp(~x+y)
slope=r$rotation[2,1]/r$rotation[1,1]
intercept=r$center[2]-slope*r$center[1]
abline(a=intercept,b=slope,lty=3)

### constructing the spread

calculate_spread=function(x,y,beta){return(y-beta*x)}

calculate_beta_and_level=function(x,y,start_date,end_date){
  
  require(xts)
  
  time_range=paste(start_date,"::",end_date,sep="")
  x=x[time_range]
  y=y[time_range]
  dx=diff(x[time_range])
  dy=diff(y[time_range])
  r=prcomp(~dx+dy)
  beta=r$rotation[2,1]/r$rotation[1,1]
  spread=calculate_spread(x,y,beta)
  names(spread)="spread"
  level=mean(spread,na.rm=TRUE)
  outL=list()
  outL$spread=spread
  outL$beta=beta
  outL$level=level
  return(outL)
  
}

calculate_buy_sell_signals=function(spread,beta,level,lower_threshold,upper_threshold){
  
  buy_signals=ifelse(spread<=level-lower_threshold,1,0)
  sell_signals=ifelse(spread>=level+upper_threshold,1,0)
  output=cbind(spread,buy_signals,sell_signals)
  colnames(output)=c("spread","buy_signals","sell_signals")
  return(output)
  
}

### signal generation and validation for rolling window 

window_length=10

# in sample

start_date="2014-01-01"
end_date="2014-12-31"
range=paste(start_date,"::",end_date,sep="")
x=PFE[range,6]
y=MERK[range,6]
dF=cbind(x,y)
names(dF)=c("x","y")

run_regression=function(dF){
  return(coef(lm(y~x-1,data=as.data.frame(dF))))
  }
# y~x-1 means no intercept

rolling_beta=function(z,width){
  rollapply(z,width=width,FUN=run_regression,by.column=FALSE,align="right")
  }

betas=rolling_beta(diff(dF),10)

data=merge(betas,dF)
data$spread=data$y-lag(betas,1)*data$x   

returns=diff(dF)/dF
return_beta=rolling_beta(returns,10)
data$spreadR=diff(data$y)/data$y-return_beta*diff(data$x)/data$x 

tail(data)
##               betas        x        y    spread      spreadR
## 2014-12-23 1.962627 29.61721 54.04233  1.376813 -0.008884876
## 2014-12-24 1.964650 29.58895 54.27849 -3.793581  0.005370205
## 2014-12-26 1.973137 29.80555 54.58077 -3.976699 -0.002255313
## 2014-12-29 1.782135 29.56070 54.53354 -3.793771  0.007186639
## 2014-12-30 1.746206 29.55128 54.45797  1.793582 -0.001084284
## 2014-12-31 1.837884 29.33469 53.64559  2.421177 -0.007728864
################################################
# New Work
################################################

head(dF)
##                   x        y
## 2014-01-02 27.70978 45.36431
## 2014-01-03 27.76436 45.58430
## 2014-01-06 27.79165 45.59347
## 2014-01-07 27.96450 45.93263
## 2014-01-08 28.15554 45.63930
## 2014-01-09 28.13734 45.39181
dFnew <- dF
dFnew$dif <- dFnew[,1] - dFnew[,2]
head(dFnew)
##                   x        y       dif
## 2014-01-02 27.70978 45.36431 -17.65453
## 2014-01-03 27.76436 45.58430 -17.81994
## 2014-01-06 27.79165 45.59347 -17.80182
## 2014-01-07 27.96450 45.93263 -17.96813
## 2014-01-08 28.15554 45.63930 -17.48376
## 2014-01-09 28.13734 45.39181 -17.25447
Spread <- data$spread

set.seed(12345)
noise <- rnorm(241)
ar1 <- arima.sim(model=list(ar=c(0.9)), n=241)

########
# plots
########

par(mfrow=c(2,2))

ts.plot(dFnew[,1], main = 'PFE', ylab = 'Adjusted Close')
acf(dFnew[,1], lag.max=100, na.action=na.pass, main="PFE")
ts.plot(dFnew[,2], main = 'MRK', ylab = 'Adjusted Close')
acf(dFnew[,2], lag.max=100, na.action=na.pass, main="MRK")

mtext('2014 Adjusted Close Price [PFE vs MERK] with ACF', outer = TRUE, cex = 1.5)

# Checking for WN
# PFE

par(mfrow=c(1,2), oma = c(0, 0, 2, 0))

acf(diff(dFnew[,1]), lag.max=30, na.action=na.pass, main="PFE")
acf(diff(dFnew[,1])^2, lag.max=30, na.action=na.pass, main="PFE^2")

mtext('ACF: 2014 Differenced PFE Transforms', outer = TRUE, cex = 1.5)

# MRK

acf(diff(dFnew[,2]), lag.max=30, na.action=na.pass, main="MRK")
acf(diff(dFnew[,2])^2, lag.max=30, na.action=na.pass, main="MRK^2")

mtext('ACF: 2014 Differenced MRK Transforms', outer = TRUE, cex = 1.5)

# Check spread

layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))

ts.plot(Spread, main = "")
acf(Spread, lag.max=100, na.action=na.pass, main="Spread")
acf(diff(Spread), na.action=na.pass, main="Differenced Spread")

mtext('2014 Spread with ACF', outer = TRUE, cex = 1.5)

# Spread transforms 

par(mfrow=c(2,2), oma = c(0, 0, 2, 0))

acf(diff(Spread), na.action=na.pass, lag.max=30, main='Spread')
acf(diff(Spread)^2, na.action=na.pass, lag.max=30, main='Spread^2')
acf(diff(log(Spread)), na.action=na.pass, lag.max=30, main='log(Spread)')
## Warning in log(Spread): NaNs produced
acf(diff(log(Spread))^2, na.action=na.pass, lag.max=30, main='log(Spread)^2')
## Warning in log(Spread): NaNs produced
mtext('ACF: 2014 Differenced Spread Transforms', outer = TRUE, cex = 1.5)

# check differenced ACF's to find the order

par(mfrow=c(1,3),oma = c(0, 0, 2, 0))

acf(Spread,na.action=na.pass,lag.max=100, main = "Spread")
acf(diff(Spread),na.action=na.pass,lag.max=30, main = "Differenced")
acf(diff(diff(Spread)),na.action=na.pass,lag.max=30, main = "Twice Differenced")

mtext('ACF: 2014 Spread vs Differenced', outer = TRUE, cex = 1.5)

# check against simulations for spread v ar1 and difspread v noise

par(mfrow=c(2,2),oma = c(0, 0, 2, 0))

plot.ts(ar1, main="AR1 = 0.9", ylab = "")
plot.ts(noise, main = "White Noise", ylab = "")
plot.ts(Spread, main = "Spread", ylab = "")
plot.ts(diff(Spread), main = "Differenced Spread", ylab = "")

mtext('Simulated vs 2014 Spread', outer = TRUE, cex = 1.5)

# acf against AR1

par(mfrow=c(1,3),oma = c(0, 0, 2, 0))

acf(ar1, lag.max=100, main="AR1 - 0.9")
acf(Spread, na.action=na.pass, lag.max=100, main="Spread")
acf(diff(Spread), na.action=na.pass, main="Differenced Spread")

mtext('ACF: Simulated vs 2014 Spread', outer = TRUE, cex = 1.5)