AUTO REGRESSIVE INTEGRATED MOVING AVERAGE MODELS
# install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
# install.packages("forecast",repos = "http://cran.us.r-project.org")
# install.packages("fpp",repos = "http://cran.us.r-project.org")
# install.packages("smooth",repos = "http://cran.us.r-project.org")
# install.packages("readxl",repos = "http://cran.us.r-project.org")
# install.packages("tseries",repos = "http://cran.us.r-project.org")
library(forecast)
## Warning: package 'forecast' was built under R version 3.5.1
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.1
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(smooth)
## Warning: package 'smooth' was built under R version 3.5.1
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 3.5.1
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
library(tseries)
# Using Arima Model -
Cocacola<-read_excel(file.choose()) # read the Cocacola data
Cocacola <- Cocacola$Sales
Cocacola <- as.ts(Cocacola)
View(Cocacola)
class(Cocacola)
## [1] "ts"
Cocacola1 <- ts(Cocacola,start=c(1986,1),end=c(1995,6),frequency=4)
start(Cocacola1)
## [1] 1986 1
end(Cocacola1)
## [1] 1996 2
class(Cocacola1)
## [1] "ts"
sum(is.na(Cocacola1))
## [1] 0
summary(Cocacola1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1548 2160 2782 2994 3609 5253
View(Cocacola1)
# decomdata<- decompose(Cocacola1, "additive")
decomdata<- decompose(Cocacola1, "multiplicative")
plot(decomdata)

plot(decomdata$seasonal)

plot(decomdata$trend)

plot(decomdata$random)

# EDA on the Original Data
plot(Cocacola1)
abline(reg=lm(Cocacola1~time(Cocacola1)))

cycle(Cocacola1)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1986 1 2 3 4
## 1987 1 2 3 4
## 1988 1 2 3 4
## 1989 1 2 3 4
## 1990 1 2 3 4
## 1991 1 2 3 4
## 1992 1 2 3 4
## 1993 1 2 3 4
## 1994 1 2 3 4
## 1995 1 2 3 4
## 1996 1 2
# Boxplot by Cycle
boxplot(Cocacola1~cycle(Cocacola1,xlab = "Date", ylab = "Passenger Number(100's)",
main = "Monthly Boxplot of passengers from 1995 to 2002"))

# Use Auto Arima for the Best Model
Newmodel <- auto.arima(Cocacola1)
Newmodel
## Series: Cocacola1
## ARIMA(0,1,0)(0,1,0)[4]
##
## sigma^2 estimated as 30175: log likelihood=-243.32
## AIC=488.65 AICc=488.76 BIC=490.26
# Use the trace function to understand the determine the best p,d,q values that were selected.
auto.arima(Cocacola1, ic = "aic", trace = TRUE)
##
## ARIMA(2,1,2)(1,1,1)[4] : Inf
## ARIMA(0,1,0)(0,1,0)[4] : 488.6475
## ARIMA(1,1,0)(1,1,0)[4] : 491.971
## ARIMA(0,1,1)(0,1,1)[4] : 491.8391
## ARIMA(0,1,0)(1,1,0)[4] : 490.0841
## ARIMA(0,1,0)(0,1,1)[4] : 490.058
## ARIMA(0,1,0)(1,1,1)[4] : 491.9662
## ARIMA(1,1,0)(0,1,0)[4] : 490.5959
## ARIMA(0,1,1)(0,1,0)[4] : 490.5453
## ARIMA(1,1,1)(0,1,0)[4] : Inf
##
## Best model: ARIMA(0,1,0)(0,1,0)[4]
## Series: Cocacola1
## ARIMA(0,1,0)(0,1,0)[4]
##
## sigma^2 estimated as 30175: log likelihood=-243.32
## AIC=488.65 AICc=488.76 BIC=490.26
# tseries evaluation
plot.ts(Newmodel$residuals)

acf(ts(Newmodel$residuals),main = 'ACF Residual')

pacf(ts(Newmodel$residuals),main = 'PACF Residual')

# Forecast for next 2 year
Pass_Forecast <- forecast(Newmodel,Level=c(95),h=10*12)
## Warning in forecast.Arima(Newmodel, Level = c(95), h = 10 * 12): The non-
## existent Level arguments will be ignored.
plot(Pass_Forecast)

# Test your final model
Box.test(Newmodel$resid, lag = 5, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: Newmodel$resid
## X-squared = 3.9366, df = 5, p-value = 0.5586
Box.test(Newmodel$resid, lag = 15, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: Newmodel$resid
## X-squared = 9.2417, df = 15, p-value = 0.8645
Box.test(Newmodel$resid, lag = 10, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: Newmodel$resid
## X-squared = 7.4886, df = 10, p-value = 0.6787