(a) Plot the market shares of Colgate and Crest for the whole sample
# Intervention Analysis Approach to the Colgate and Crest series
# Import data from the "http://myweb.ttu.edu/jduras/files/teaching/e5316/toothpaste.csv" as Tibble
rm(list=ls())
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.4
## -- Attaching packages ----------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts -------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
## Warning: package 'magrittr' was built under R version 3.4.4
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.4.4
library(forecast)
## Warning: package 'forecast' was built under R version 3.4.4
library(TSA)
## Warning: package 'TSA' was built under R version 3.4.4
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 3.4.4
## Loading required package: locfit
## Warning: package 'locfit' was built under R version 3.4.4
## locfit 1.5-9.1 2013-03-22
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:forecast':
##
## getResponse
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.8-22. For overview type 'help("mgcv-package")'.
## Loading required package: tseries
##
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
##
## spec
## The following objects are masked from 'package:stats':
##
## acf, arima
## The following object is masked from 'package:utils':
##
## tar
data <- read_csv("http://myweb.ttu.edu/jduras/files/teaching/e5316/toothpaste.csv")
## Parsed with column specification:
## cols(
## date = col_date(format = ""),
## yearwk = col_double(),
## Crest = col_double(),
## Colgate = col_double(),
## pulse.ADA = col_integer(),
## step.ADA = col_integer()
## )
attach(data)
# Weekly Data for Colgate and Crest : 1958 - 1962
# sets the intervention Date
intDate<-as.Date("1960-07-30")
# Plots market share for Colagate
plotColgate<-data%>%
ggplot( aes(x=date,y=Colgate))+
geom_line()+
labs(y="Colgate", title="")+
geom_vline(aes(xintercept=as.Date(intDate), col="red"), show.legend = F)+
theme_minimal()
plot(plotColgate)

# Plots market share for Crest
plotCrest<-data%>%
ggplot( aes(x=date,y=Crest))+
geom_line()+
labs(y="Crest", title="Market Share 1958-1962")+
geom_vline(aes(xintercept=as.Date(intDate), col="red"), show.legend = F)+
theme_minimal()
plot(plotCrest)

# As shown in both plots, there is a clear drop and increase in Colgate and Crest after the 1960, respectively.
# Then we need to split the data into a pre-intervention and a post-intervention sample.
# The pre-intervention sample ends in "1960-07-30".
# The post-intervention sample starts after "1960-07-30".
(b) Use auto.arima to estimate two ARIMA models for pre-intervention period
# Sample for pre-intervention period
train <- data%>%
filter(date<intDate)
# Sample for post-intervention period
test <- data%>%
filter(date>=intDate)
# identify the model for the process over the pre-intervention period
## for Colgate
Colgate.pre.intervention <- ts(train$Colgate)
LColgate.pre.intervention <- log(Colgate.pre.intervention)
dLColgate.pre.intervention1 <-diff(LColgate.pre.intervention)
dLColgate.pre.intervention12 <- diff(LColgate.pre.intervention, lag=12)
d2LColgate.pre.intervention_12 <- diff(diff(LColgate.pre.intervention), lag=12)
par(mfrow=c(2,3))
plot(Colgate.pre.intervention, xlab = "", ylab = "", main = 'Colgate.pre.intervention')
plot(LColgate.pre.intervention, xlab = "", ylab = "", main = 'log(Colgate.pre.intervention)')
plot(dLColgate.pre.intervention1, xlab = "", ylab = "", main = expression(paste(Delta, "log(Colgate.pre.intervention)")))
plot(dLColgate.pre.intervention12, xlab = "", ylab = "", main = expression(paste(Delta[12], "log(Colgate.pre.intervention)")))
plot(d2LColgate.pre.intervention_12, xlab = "", ylab = "", main = expression(paste(Delta,Delta[12], "log(Colgate.pre.intervention)")))
library(dygraphs)
## Warning: package 'dygraphs' was built under R version 3.4.4
dygraph(dLColgate.pre.intervention1)
# Auto-correlation : ACF and PACF for Colgate
maxlag <- 60 # Set to 60 because in months this is 5 years
par(mfrow=c(2,4))

Acf(LColgate.pre.intervention, lag.max=maxlag, main = expression(paste("ACF for log(Colgate.pre.intervention)")))
Acf(dLColgate.pre.intervention1, lag.max=maxlag, main = expression(paste("ACF for ",Delta, "log(Colgate.pre.intervention)")))
Acf(dLColgate.pre.intervention12, lag.max=maxlag, main = expression(paste("ACF for ",Delta[12], "log(Colgate.pre.intervention)")))
Acf(d2LColgate.pre.intervention_12, lag.max=maxlag, main = expression(paste("ACF for ",Delta,Delta[12], "log(Colgate.pre.intervention)")))
Pacf(LColgate.pre.intervention, lag.max = maxlag, main = expression(paste("PACF for log(Colgate.pre.intervention)")))
Pacf(dLColgate.pre.intervention1, lag.max = maxlag, main = expression(paste("PACF for ",Delta, "log(Colgate.pre.intervention)")))
Pacf(dLColgate.pre.intervention12, lag.max = maxlag, main = expression(paste("PACF for ",Delta[12], "log(Colgate.pre.intervention)")))
Pacf(d2LColgate.pre.intervention_12, lag.max = maxlag, main = expression(paste("PACF for ",Delta,Delta[12], "log(Colgate.pre.intervention)")))

# Arima models for pre-intervention period using auto.arima for Colgate
armaColgate <- auto.arima(LColgate.pre.intervention)
armaColgate
## Series: LColgate.pre.intervention
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.8048
## s.e. 0.0539
##
## sigma^2 estimated as 0.02105: log likelihood=68
## AIC=-131.99 AICc=-131.9 BIC=-126.21
# Adequacy of the Model
tsdiag(armaColgate, gof.lag = maxlag)

## for Crest
# identify the model for the process over the pre-intervention period
Crest.pre.intervention <- ts(train$Crest)
LCrest.pre.intervention <- log(Crest.pre.intervention)
dLCrest.pre.intervention1 <-diff(LCrest.pre.intervention)
dLCrest.pre.intervention12 <- diff(LCrest.pre.intervention, lag=12)
d2LCrest.pre.intervention_12 <- diff(diff(LCrest.pre.intervention), lag=12)
par(mfrow=c(2,3))
plot(Crest.pre.intervention, xlab = "", ylab = "", main = 'Crest.pre.intervention')
plot(LCrest.pre.intervention, xlab = "", ylab = "", main = 'log(Crest.pre.intervention)')
plot(dLCrest.pre.intervention1, xlab = "", ylab = "", main = expression(paste(Delta, "log(Crest.pre.intervention)")))
plot(dLCrest.pre.intervention12, xlab = "", ylab = "", main = expression(paste(Delta[12], "log(Crest.pre.intervention)")))
plot(d2LCrest.pre.intervention_12, xlab = "", ylab = "", main = expression(paste(Delta,Delta[12], "log(Crest.pre.intervention)")))
library(dygraphs)
dygraph(dLCrest.pre.intervention1)
# Auto-correlation : ACF and PACF for Crest
maxlag <- 60 # Set to 60 because in months this is 5 years
par(mfrow=c(2,4))

Acf(LCrest.pre.intervention, lag.max=maxlag, main = expression(paste("ACF for log(Crest.pre.intervention)")))
Acf(dLCrest.pre.intervention1, lag.max=maxlag, main = expression(paste("ACF for ",Delta, "log(Crest.pre.intervention)")))
Acf(dLCrest.pre.intervention12, lag.max=maxlag, main = expression(paste("ACF for ",Delta[12], "log(Crest.pre.intervention)")))
Acf(d2LCrest.pre.intervention_12, lag.max=maxlag, main = expression(paste("ACF for ",Delta,Delta[12], "log(Crest.pre.intervention)")))
Pacf(LCrest.pre.intervention, lag.max = maxlag, main = expression(paste("PACF for log(Crest.pre.intervention)")))
Pacf(dLCrest.pre.intervention1, lag.max = maxlag, main = expression(paste("PACF for ",Delta, "log(Crest.pre.intervention)")))
Pacf(dLCrest.pre.intervention12, lag.max = maxlag, main = expression(paste("PACF for ",Delta[12], "log(Crest.pre.intervention)")))
Pacf(d2LCrest.pre.intervention_12, lag.max = maxlag, main = expression(paste("PACF for ",Delta,Delta[12], "log(Crest.pre.intervention)")))

# Arima models for pre-intervention period using auto.arima
armaCrest <- auto.arima(LCrest.pre.intervention)
armaCrest
## Series: LCrest.pre.intervention
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.6873
## s.e. 0.0694
##
## sigma^2 estimated as 0.07191: log likelihood=-13.49
## AIC=30.98 AICc=31.07 BIC=36.76
# Adequacy of the Model armaCrest
tsdiag(armaCrest, gof.lag = maxlag)

(c) Identify and estimate two transfer function models for the whole sample
# forecast based on the model for the pre-intervention period for Colgate
armaColgate.f <- forecast(armaColgate, 60)
armaColgate.f.err <- ts(log(data$Colgate)) - armaColgate.f$mean
plot(armaColgate.f) # Forecast from ARIMA

plot(armaColgate.f.err) # Multistep Forecast Error

library(forecast)
par(mfrow=c(2,1))
Acf(armaColgate.f.err, type="correlation", lag.max=60, ylab="ACF", main="armaColgate.f.err")
Acf(armaColgate.f.err, type="partial", lag.max=60, ylab="PACF", main="armaColgate.f.err")

# forecast based on the model for the pre-intervention period for Crest
armaCrest.f <- forecast(armaCrest , 60)
armaCrest.f.err <- ts(log(data$Crest)) - armaCrest.f$mean
plot(armaCrest.f) # Forecast from ARIMA
plot(armaCrest.f.err) # Multistep Forecast Error

library(forecast)
par(mfrow=c(2,1))
Acf(armaCrest.f.err, type="correlation", lag.max=60, ylab="ACF", main="armaCrest.f.err")
Acf(armaCrest.f.err, type="partial", lag.max=60, ylab="PACF", main="armaCrest.f.err")

detach("package:ggfortify", unload=T)
detach("package:dplyr", unload=T)
## Warning: 'dplyr' namespace cannot be unloaded:
## namespace 'dplyr' is imported by 'broom' so cannot be unloaded
library(TSA)
# Plot Pulse and Step Functions
par(mfrow=c(2,1))
plot(data$pulse.ADA) # Pulse.ADA
plot(data$step.ADA) # Step.ADA

armaColgate2<-arimax(data$Colgate, order=c(0,1,1), method = "ML",
xtransf=data.frame(data$pulse.ADA,data$step.ADA), transfer=list(c(2,0),c(0,0)))
armaColgate2
##
## Call:
## arimax(x = data$Colgate, order = c(0, 1, 1), method = "ML", xtransf = data.frame(data$pulse.ADA,
## data$step.ADA), transfer = list(c(2, 0), c(0, 0)))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ma1 data.pulse.ADA-AR1 data.pulse.ADA-AR2 data.pulse.ADA-MA0
## -0.8041 0.0567 0.0814 0.0626
## s.e. 0.0433 NaN NaN 0.0481
## data.step.ADA-MA0
## -0.1145
## s.e. 0.0311
##
## sigma^2 estimated as 0.002184: log likelihood = 425.37, aic = -840.74
armaCrest2<-arimax(data$Crest, order=c(0,1,1), method = "ML",
xtransf=data.frame(data$pulse.ADA,data$step.ADA), transfer=list(c(2,0),c(0,0)))
armaCrest2
##
## Call:
## arimax(x = data$Crest, order = c(0, 1, 1), method = "ML", xtransf = data.frame(data$pulse.ADA,
## data$step.ADA), transfer = list(c(2, 0), c(0, 0)))
##
## Coefficients:
## ma1 data.pulse.ADA-AR1 data.pulse.ADA-AR2 data.pulse.ADA-MA0
## -0.7594 0.0154 0.4041 -0.1371
## s.e. 0.0457 0.2464 0.1887 0.0486
## data.step.ADA-MA0
## 0.1926
## s.e. 0.0361
##
## sigma^2 estimated as 0.001831: log likelihood = 448.27, aic = -886.54
(d) Plot the estimated effects of the intervention based on your modes from (c)
Colgate <- armaColgate2$coef["data.pulse.ADA-MA0"]*data$pulse.ADA+
armaColgate2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
filter=armaColgate2$coef["data.pulse.ADA-AR1"], method = "recursive")+
armaColgate2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
filter=armaColgate2$coef["data.pulse.ADA-AR2"], method = "recursive")+
armaColgate2$coef["data.step.ADA-MA0"]*data$step.ADA
Crest <- armaCrest2$coef["data.pulse.ADA-MA0"]*data$pulse.ADA+
armaCrest2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
filter=armaCrest2$coef["data.pulse.ADA-AR1"], method = "recursive")+
armaCrest2$coef["data.pulse.ADA-MA0"]*stats::filter(data$pulse.ADA,
filter=armaCrest2$coef["data.pulse.ADA-AR2"], method = "recursive")+
armaCrest2$coef["data.step.ADA-MA0"]*data$step.ADA
library(tidyverse)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:nlme':
##
## collapse
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Colgate %<>% as.tibble() %>%
mutate(date=data$date)
Colgate1 <- ggplot(Colgate, aes(x=date, y=x+mean(train$Colgate), col="red"))+
geom_line(show.legend=F)+
geom_line(data=data, aes(x=date, y=Colgate),show.legend = F, inherit.aes=F)+
theme_minimal()+
labs(x="", y="Colgate Market Share", title="Effect of ADA Endorsment of Colgate")
plot(Colgate1)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

Crest %<>% as.tibble() %>%
mutate(date=data$date)
Crest1 <- ggplot(Crest, aes(x=date, y=x+mean(train$Crest), col="red"))+
geom_line(show.legend=F)+
geom_line(data=data, aes(x=date, y=Crest), show.legend = F, inherit.aes=F)+
theme_minimal()+
labs(x="", y="Crest Market Share", title="Effect of ADA Endorsment of Crest")
plot(Crest1)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
