(a) Plot the market shares of Colgate and Crest for the whole sample

# Intervention Analysis Approach to the Colgate and Crest series

# Import data from the "http://myweb.ttu.edu/jduras/files/teaching/e5316/toothpaste.csv" as Tibble

rm(list=ls())
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ----------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.8.0     v stringr 1.3.0
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(magrittr)
## Warning: package 'magrittr' was built under R version 3.4.4
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.4.4
library(forecast)
## Warning: package 'forecast' was built under R version 3.4.4
library(TSA)
## Warning: package 'TSA' was built under R version 3.4.4
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 3.4.4
## Loading required package: locfit
## Warning: package 'locfit' was built under R version 3.4.4
## locfit 1.5-9.1    2013-03-22
## Loading required package: mgcv
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
## 
##     getResponse
## The following object is masked from 'package:dplyr':
## 
##     collapse
## This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.
## Loading required package: tseries
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
data <- read_csv("http://myweb.ttu.edu/jduras/files/teaching/e5316/toothpaste.csv")
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   yearwk = col_double(),
##   Crest = col_double(),
##   Colgate = col_double(),
##   pulse.ADA = col_integer(),
##   step.ADA = col_integer()
## )
attach(data)

# Weekly Data for Colgate and Crest : 1958 - 1962

#  sets the intervention Date  

intDate<-as.Date("1960-07-30") 

#  Plots market share for Colagate  

plotColgate<-data%>%
  ggplot( aes(x=date,y=Colgate))+
  geom_line()+
  labs(y="Colgate", title="")+
  geom_vline(aes(xintercept=as.Date(intDate), col="red"), show.legend = F)+
  theme_minimal()

plot(plotColgate)

#  Plots market share for Crest  

plotCrest<-data%>%
  ggplot( aes(x=date,y=Crest))+
  geom_line()+
  labs(y="Crest", title="Market Share 1958-1962")+
  geom_vline(aes(xintercept=as.Date(intDate), col="red"), show.legend = F)+
  theme_minimal()

plot(plotCrest)

# As shown in both plots, there is a clear drop and increase in Colgate and Crest after the 1960, respectively. 
# Then we need to split the data into a pre-intervention and a post-intervention sample. 
# The pre-intervention sample ends in "1960-07-30".
# The post-intervention sample starts after "1960-07-30".

(b) Use auto.arima to estimate two ARIMA models for pre-intervention period

# Sample for pre-intervention period 

train <- data%>%
  filter(date<intDate)

# Sample for post-intervention period

test <- data%>%
  filter(date>=intDate) 

# identify the model for the process over the pre-intervention period

## for Colgate

Colgate.pre.intervention <- ts(train$Colgate)
LColgate.pre.intervention <- log(Colgate.pre.intervention)
dLColgate.pre.intervention1 <-diff(LColgate.pre.intervention)
dLColgate.pre.intervention12 <- diff(LColgate.pre.intervention, lag=12)
d2LColgate.pre.intervention_12 <- diff(diff(LColgate.pre.intervention), lag=12)

par(mfrow=c(2,3))
plot(Colgate.pre.intervention, xlab = "", ylab = "", main = 'Colgate.pre.intervention')
plot(LColgate.pre.intervention, xlab = "", ylab = "", main = 'log(Colgate.pre.intervention)')
plot(dLColgate.pre.intervention1, xlab = "", ylab = "", main = expression(paste(Delta, "log(Colgate.pre.intervention)")))
plot(dLColgate.pre.intervention12, xlab = "", ylab = "", main = expression(paste(Delta[12], "log(Colgate.pre.intervention)")))
plot(d2LColgate.pre.intervention_12, xlab = "", ylab = "", main = expression(paste(Delta,Delta[12], "log(Colgate.pre.intervention)")))

library(dygraphs)
## Warning: package 'dygraphs' was built under R version 3.4.4
dygraph(dLColgate.pre.intervention1)
# Auto-correlation : ACF and PACF for Colgate

maxlag <- 60  # Set to 60 because in months this is 5 years

par(mfrow=c(2,4))

Acf(LColgate.pre.intervention, lag.max=maxlag, main = expression(paste("ACF for log(Colgate.pre.intervention)")))
Acf(dLColgate.pre.intervention1, lag.max=maxlag, main = expression(paste("ACF for ",Delta, "log(Colgate.pre.intervention)")))
Acf(dLColgate.pre.intervention12, lag.max=maxlag, main = expression(paste("ACF for ",Delta[12], "log(Colgate.pre.intervention)")))
Acf(d2LColgate.pre.intervention_12, lag.max=maxlag, main = expression(paste("ACF for ",Delta,Delta[12], "log(Colgate.pre.intervention)")))

Pacf(LColgate.pre.intervention, lag.max = maxlag, main = expression(paste("PACF for log(Colgate.pre.intervention)")))
Pacf(dLColgate.pre.intervention1, lag.max = maxlag, main = expression(paste("PACF for ",Delta, "log(Colgate.pre.intervention)")))
Pacf(dLColgate.pre.intervention12, lag.max = maxlag, main = expression(paste("PACF for ",Delta[12], "log(Colgate.pre.intervention)")))
Pacf(d2LColgate.pre.intervention_12, lag.max = maxlag, main = expression(paste("PACF for ",Delta,Delta[12], "log(Colgate.pre.intervention)")))

# Arima models for pre-intervention period using auto.arima for Colgate

armaColgate <- auto.arima(LColgate.pre.intervention)
armaColgate
## Series: LColgate.pre.intervention 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.8048
## s.e.   0.0539
## 
## sigma^2 estimated as 0.02105:  log likelihood=68
## AIC=-131.99   AICc=-131.9   BIC=-126.21
# Adequacy of the Model

tsdiag(armaColgate, gof.lag = maxlag)

## for Crest

# identify the model for the process over the pre-intervention period

Crest.pre.intervention <- ts(train$Crest)
LCrest.pre.intervention <- log(Crest.pre.intervention)
dLCrest.pre.intervention1 <-diff(LCrest.pre.intervention)
dLCrest.pre.intervention12 <- diff(LCrest.pre.intervention, lag=12)
d2LCrest.pre.intervention_12 <- diff(diff(LCrest.pre.intervention), lag=12)

par(mfrow=c(2,3))
plot(Crest.pre.intervention, xlab = "", ylab = "", main = 'Crest.pre.intervention')
plot(LCrest.pre.intervention, xlab = "", ylab = "", main = 'log(Crest.pre.intervention)')
plot(dLCrest.pre.intervention1, xlab = "", ylab = "", main = expression(paste(Delta, "log(Crest.pre.intervention)")))
plot(dLCrest.pre.intervention12, xlab = "", ylab = "", main = expression(paste(Delta[12], "log(Crest.pre.intervention)")))
plot(d2LCrest.pre.intervention_12, xlab = "", ylab = "", main = expression(paste(Delta,Delta[12], "log(Crest.pre.intervention)")))

library(dygraphs)
dygraph(dLCrest.pre.intervention1)
# Auto-correlation : ACF and PACF for Crest

maxlag <- 60  # Set to 60 because in months this is 5 years

par(mfrow=c(2,4))

Acf(LCrest.pre.intervention, lag.max=maxlag, main = expression(paste("ACF for log(Crest.pre.intervention)")))
Acf(dLCrest.pre.intervention1, lag.max=maxlag, main = expression(paste("ACF for ",Delta, "log(Crest.pre.intervention)")))
Acf(dLCrest.pre.intervention12, lag.max=maxlag, main = expression(paste("ACF for ",Delta[12], "log(Crest.pre.intervention)")))
Acf(d2LCrest.pre.intervention_12, lag.max=maxlag, main = expression(paste("ACF for ",Delta,Delta[12], "log(Crest.pre.intervention)")))

Pacf(LCrest.pre.intervention, lag.max = maxlag, main = expression(paste("PACF for log(Crest.pre.intervention)")))
Pacf(dLCrest.pre.intervention1, lag.max = maxlag, main = expression(paste("PACF for ",Delta, "log(Crest.pre.intervention)")))
Pacf(dLCrest.pre.intervention12, lag.max = maxlag, main = expression(paste("PACF for ",Delta[12], "log(Crest.pre.intervention)")))
Pacf(d2LCrest.pre.intervention_12, lag.max = maxlag, main = expression(paste("PACF for ",Delta,Delta[12], "log(Crest.pre.intervention)")))

# Arima models for pre-intervention period using auto.arima 

armaCrest <- auto.arima(LCrest.pre.intervention)
armaCrest
## Series: LCrest.pre.intervention 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.6873
## s.e.   0.0694
## 
## sigma^2 estimated as 0.07191:  log likelihood=-13.49
## AIC=30.98   AICc=31.07   BIC=36.76
# Adequacy of the Model armaCrest

tsdiag(armaCrest, gof.lag = maxlag)

(c) Identify and estimate two transfer function models for the whole sample

# forecast based on the model for the pre-intervention period for Colgate

armaColgate.f <- forecast(armaColgate, 60)
armaColgate.f.err <- ts(log(data$Colgate)) - armaColgate.f$mean

plot(armaColgate.f) # Forecast from ARIMA

plot(armaColgate.f.err) # Multistep Forecast Error

library(forecast)
par(mfrow=c(2,1))
Acf(armaColgate.f.err, type="correlation", lag.max=60, ylab="ACF", main="armaColgate.f.err")
Acf(armaColgate.f.err, type="partial", lag.max=60, ylab="PACF", main="armaColgate.f.err")

# forecast based on the model for the pre-intervention period for Crest

armaCrest.f <- forecast(armaCrest , 60)
armaCrest.f.err <- ts(log(data$Crest)) - armaCrest.f$mean

plot(armaCrest.f) # Forecast from ARIMA
plot(armaCrest.f.err) # Multistep Forecast Error

library(forecast)
par(mfrow=c(2,1))
Acf(armaCrest.f.err, type="correlation", lag.max=60, ylab="ACF", main="armaCrest.f.err")
Acf(armaCrest.f.err, type="partial", lag.max=60, ylab="PACF", main="armaCrest.f.err")

detach("package:ggfortify", unload=T)
detach("package:dplyr", unload=T)
## Warning: 'dplyr' namespace cannot be unloaded:
##   namespace 'dplyr' is imported by 'broom' so cannot be unloaded
library(TSA)

# Plot Pulse and Step Functions 

par(mfrow=c(2,1))

plot(data$pulse.ADA) #  Pulse.ADA
plot(data$step.ADA) # Step.ADA 

armaColgate2<-arimax(data$Colgate, order=c(0,1,1), method = "ML",
                     xtransf=data.frame(data$pulse.ADA,data$step.ADA), transfer=list(c(2,0),c(0,0)))

armaColgate2
## 
## Call:
## arimax(x = data$Colgate, order = c(0, 1, 1), method = "ML", xtransf = data.frame(data$pulse.ADA, 
##     data$step.ADA), transfer = list(c(2, 0), c(0, 0)))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##           ma1  data.pulse.ADA-AR1  data.pulse.ADA-AR2  data.pulse.ADA-MA0
##       -0.8041              0.0567              0.0814              0.0626
## s.e.   0.0433                 NaN                 NaN              0.0481
##       data.step.ADA-MA0
##                 -0.1145
## s.e.             0.0311
## 
## sigma^2 estimated as 0.002184:  log likelihood = 425.37,  aic = -840.74
armaCrest2<-arimax(data$Crest, order=c(0,1,1), method = "ML",
                   xtransf=data.frame(data$pulse.ADA,data$step.ADA), transfer=list(c(2,0),c(0,0)))

armaCrest2
## 
## Call:
## arimax(x = data$Crest, order = c(0, 1, 1), method = "ML", xtransf = data.frame(data$pulse.ADA, 
##     data$step.ADA), transfer = list(c(2, 0), c(0, 0)))
## 
## Coefficients:
##           ma1  data.pulse.ADA-AR1  data.pulse.ADA-AR2  data.pulse.ADA-MA0
##       -0.7594              0.0154              0.4041             -0.1371
## s.e.   0.0457              0.2464              0.1887              0.0486
##       data.step.ADA-MA0
##                  0.1926
## s.e.             0.0361
## 
## sigma^2 estimated as 0.001831:  log likelihood = 448.27,  aic = -886.54

(d) Plot the estimated effects of the intervention based on your modes from (c)

Colgate <- armaColgate2$coef["data.pulse.ADA-MA0"]*data$pulse.ADA+
  armaColgate2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
                                                       filter=armaColgate2$coef["data.pulse.ADA-AR1"], method = "recursive")+
  armaColgate2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
                                                       filter=armaColgate2$coef["data.pulse.ADA-AR2"], method = "recursive")+
  armaColgate2$coef["data.step.ADA-MA0"]*data$step.ADA


Crest <- armaCrest2$coef["data.pulse.ADA-MA0"]*data$pulse.ADA+
  armaCrest2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
                                                     filter=armaCrest2$coef["data.pulse.ADA-AR1"], method = "recursive")+
  armaCrest2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
                                                     filter=armaCrest2$coef["data.pulse.ADA-AR2"], method = "recursive")+
  armaCrest2$coef["data.step.ADA-MA0"]*data$step.ADA

library(tidyverse)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Colgate %<>% as.tibble() %>% 
  mutate(date=data$date)

Colgate1 <- ggplot(Colgate, aes(x=date, y=x+mean(train$Colgate), col="red"))+
  geom_line(show.legend=F)+
  geom_line(data=data, aes(x=date, y=Colgate),show.legend = F, inherit.aes=F)+
  theme_minimal()+
  labs(x="", y="Colgate Market Share", title="Effect of ADA Endorsment of Colgate")

plot(Colgate1)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

Crest %<>% as.tibble() %>% 
  mutate(date=data$date)

Crest1 <- ggplot(Crest, aes(x=date, y=x+mean(train$Crest), col="red"))+
  geom_line(show.legend=F)+
  geom_line(data=data, aes(x=date, y=Crest), show.legend = F, inherit.aes=F)+
  theme_minimal()+
  labs(x="", y="Crest Market Share", title="Effect of ADA Endorsment of Crest")

plot(Crest1)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.