Plastics Sales Prediction using Auto Arima Forecast

AUTO REGRESSIVE INTEGRATED MOVING AVERAGE MODELS

#install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
# install.packages("forecast",repos = "http://cran.us.r-project.org")
# install.packages("fpp",repos = "http://cran.us.r-project.org")
# install.packages("smooth",repos = "http://cran.us.r-project.org")
# install.packages("readxl",repos = "http://cran.us.r-project.org")
# install.packages("tseries",repos = "http://cran.us.r-project.org")

library(forecast)
## Warning: package 'forecast' was built under R version 3.5.1
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.1
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(smooth)
## Warning: package 'smooth' was built under R version 3.5.1
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 3.5.1
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
library(tseries)

# Using Arima Model - 
Plastics<-read.csv(file.choose()) # read the Plastics data
Plastics <- Plastics$Sales
Plastics <- as.ts(Plastics)
View(Plastics)
class(Plastics)
## [1] "ts"
Plastics1 <- ts(Plastics,start=c(1986,1),end=c(1995,6),frequency=4)


start(Plastics1)
## [1] 1986    1
end(Plastics1)
## [1] 1996    2
class(Plastics1)
## [1] "ts"
sum(is.na(Plastics1))
## [1] 0
summary(Plastics1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   697.0   896.5  1082.5  1071.6  1261.2  1473.0
View(Plastics1)

# decomdata<- decompose(Plastics1, "additive")
decomdata<- decompose(Plastics1, "multiplicative")
plot(decomdata)

plot(decomdata$seasonal)

plot(decomdata$trend)

plot(decomdata$random)

# EDA on the Original Data
plot(Plastics1)
abline(reg=lm(Plastics1~time(Plastics1)))

cycle(Plastics1)
##      Qtr1 Qtr2 Qtr3 Qtr4
## 1986    1    2    3    4
## 1987    1    2    3    4
## 1988    1    2    3    4
## 1989    1    2    3    4
## 1990    1    2    3    4
## 1991    1    2    3    4
## 1992    1    2    3    4
## 1993    1    2    3    4
## 1994    1    2    3    4
## 1995    1    2    3    4
## 1996    1    2
# Boxplot by Cycle
boxplot(Plastics1~cycle(Plastics1,xlab = "Date", ylab = "Passenger Number(100's)",
                        main = "Monthly Boxplot of passengers from 1995 to 2002"))

# Use Auto Arima for the Best Model 
Newmodel <- auto.arima(Plastics1)
Newmodel
## Series: Plastics1 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1       mean
##       1.6519  -0.8942  -0.3016  1086.9233
## s.e.  0.0775   0.0720   0.1509    32.1603
## 
## sigma^2 estimated as 5475:  log likelihood=-240.09
## AIC=490.18   AICc=491.85   BIC=498.87
# Use the trace function to understand the determine the best p,d,q values that were selected.

auto.arima(Plastics1, ic = "aic", trace = TRUE)
## 
##  ARIMA(2,0,2)(1,0,1)[4] with non-zero mean : 491.0166
##  ARIMA(0,0,0)           with non-zero mean : 578.474
##  ARIMA(1,0,0)(1,0,0)[4] with non-zero mean : 522.0507
##  ARIMA(0,0,1)(0,0,1)[4] with non-zero mean : 537.9039
##  ARIMA(0,0,0)           with zero mean     : 709.0785
##  ARIMA(2,0,2)(0,0,1)[4] with non-zero mean : 492.0116
##  ARIMA(2,0,2)(2,0,1)[4] with non-zero mean : 495.0814
##  ARIMA(2,0,2)(1,0,0)[4] with non-zero mean : Inf
##  ARIMA(2,0,2)(1,0,2)[4] with non-zero mean : Inf
##  ARIMA(2,0,2)           with non-zero mean : 491.8546
##  ARIMA(2,0,2)(2,0,2)[4] with non-zero mean : Inf
##  ARIMA(1,0,2)(1,0,1)[4] with non-zero mean : 504.6339
##  ARIMA(3,0,2)(1,0,1)[4] with non-zero mean : Inf
##  ARIMA(2,0,1)(1,0,1)[4] with non-zero mean : 490.6965
##  ARIMA(1,0,0)(1,0,1)[4] with non-zero mean : 521.3338
##  ARIMA(2,0,1)(1,0,1)[4] with zero mean     : Inf
##  ARIMA(2,0,1)(0,0,1)[4] with non-zero mean : 490.9761
##  ARIMA(2,0,1)(2,0,1)[4] with non-zero mean : Inf
##  ARIMA(2,0,1)(1,0,0)[4] with non-zero mean : 490.7761
##  ARIMA(2,0,1)(1,0,2)[4] with non-zero mean : Inf
##  ARIMA(2,0,1)           with non-zero mean : 490.1845
##  ARIMA(1,0,1)           with non-zero mean : 509.6646
##  ARIMA(3,0,1)           with non-zero mean : Inf
##  ARIMA(2,0,0)           with non-zero mean : 491.0467
##  ARIMA(1,0,0)           with non-zero mean : 526.7394
##  ARIMA(3,0,2)           with non-zero mean : Inf
##  ARIMA(2,0,1)           with zero mean     : 510.3341
## 
##  Best model: ARIMA(2,0,1)           with non-zero mean
## Series: Plastics1 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1       mean
##       1.6519  -0.8942  -0.3016  1086.9233
## s.e.  0.0775   0.0720   0.1509    32.1603
## 
## sigma^2 estimated as 5475:  log likelihood=-240.09
## AIC=490.18   AICc=491.85   BIC=498.87
# tseries evaluation

plot.ts(Newmodel$residuals)

acf(ts(Newmodel$residuals),main = 'ACF Residual')

pacf(ts(Newmodel$residuals),main = 'PACF Residual')

# Forecast for next 2 year
Pass_Forecast <- forecast(Newmodel,Level=c(95),h=10*12)
## Warning in forecast.Arima(Newmodel, Level = c(95), h = 10 * 12): The non-
## existent Level arguments will be ignored.
plot(Pass_Forecast)

# Test your final model

Box.test(Newmodel$resid, lag = 5, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 4.6926, df = 5, p-value = 0.4545
Box.test(Newmodel$resid, lag = 15, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 19.744, df = 15, p-value = 0.182
Box.test(Newmodel$resid, lag = 10, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 8.926, df = 10, p-value = 0.5391