Airline Passengers using Auto Arima Forecast

AUTO REGRESSIVE INTEGRATED MOVING AVERAGE MODELS

# install.packages("rmarkdown",repos = "http://cran.us.r-project.org")
# install.packages("forecast",repos = "http://cran.us.r-project.org")
# install.packages("fpp",repos = "http://cran.us.r-project.org")
# install.packages("smooth",repos = "http://cran.us.r-project.org")
# install.packages("readxl",repos = "http://cran.us.r-project.org")
# install.packages("tseries",repos = "http://cran.us.r-project.org")

library(forecast)
## Warning: package 'forecast' was built under R version 3.5.1
library(fpp)
## Warning: package 'fpp' was built under R version 3.5.1
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.1
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.1
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.5.1
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.1
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.5.1
library(smooth)
## Warning: package 'smooth' was built under R version 3.5.1
## Loading required package: greybox
## Warning: package 'greybox' was built under R version 3.5.1
## Package "greybox", v0.3.3 loaded.
## This is package "smooth", v2.4.7
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.1
library(tseries)

# Using Arima Model - 
Airlines<-read_excel(file.choose()) # read the Airlines data
Airlines <- Airlines$Passengers
Airlines <- as.ts(Airlines)
View(Airlines)
class(Airlines)
## [1] "ts"
Airlines1 <- ts(Airlines,start=c(1995,1),end=c(2002,12),frequency=12)


start(Airlines1)
## [1] 1995    1
end(Airlines1)
## [1] 2002   12
class(Airlines1)
## [1] "ts"
sum(is.na(Airlines1))
## [1] 0
summary(Airlines1)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   104.0   156.0   200.0   213.7   264.8   413.0
View(Airlines1)

# decomdata<- decompose(Airlines1, "additive")
decomdata<- decompose(Airlines1, "multiplicative")
plot(decomdata)

plot(decomdata$seasonal)

plot(decomdata$trend)

plot(decomdata$random)

# EDA on the Original Data
plot(Airlines1)
abline(reg=lm(Airlines1~time(Airlines1)))

cycle(Airlines1)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1995   1   2   3   4   5   6   7   8   9  10  11  12
## 1996   1   2   3   4   5   6   7   8   9  10  11  12
## 1997   1   2   3   4   5   6   7   8   9  10  11  12
## 1998   1   2   3   4   5   6   7   8   9  10  11  12
## 1999   1   2   3   4   5   6   7   8   9  10  11  12
## 2000   1   2   3   4   5   6   7   8   9  10  11  12
## 2001   1   2   3   4   5   6   7   8   9  10  11  12
## 2002   1   2   3   4   5   6   7   8   9  10  11  12
# Boxplot by Cycle
boxplot(Airlines1~cycle(Airlines1,xlab = "Date", ylab = "Passenger Number(100's)",
                        main = "Monthly Boxplot of passengers from 1995 to 2002"))

# Use Auto Arima for the Best Model 
Newmodel <- auto.arima(Airlines1)
Newmodel
## Series: Airlines1 
## ARIMA(1,1,0)(1,1,0)[12] 
## 
## Coefficients:
##           ar1     sar1
##       -0.2250  -0.2274
## s.e.   0.1076   0.1081
## 
## sigma^2 estimated as 92.5:  log likelihood=-304.98
## AIC=615.97   AICc=616.27   BIC=623.22
# Use the trace function to understand the determine the best p,d,q values that were selected.

auto.arima(Airlines1, ic = "aic", trace = TRUE)
## 
##  ARIMA(2,1,2)(1,1,1)[12]                    : 619.8465
##  ARIMA(0,1,0)(0,1,0)[12]                    : 622.0124
##  ARIMA(1,1,0)(1,1,0)[12]                    : 615.9655
##  ARIMA(0,1,1)(0,1,1)[12]                    : 616.6286
##  ARIMA(1,1,0)(0,1,0)[12]                    : 618.1919
##  ARIMA(1,1,0)(2,1,0)[12]                    : 617.8138
##  ARIMA(1,1,0)(1,1,1)[12]                    : 617.8815
##  ARIMA(1,1,0)(2,1,1)[12]                    : Inf
##  ARIMA(0,1,0)(1,1,0)[12]                    : 618.2212
##  ARIMA(2,1,0)(1,1,0)[12]                    : 617.8626
##  ARIMA(1,1,1)(1,1,0)[12]                    : 617.4616
##  ARIMA(2,1,1)(1,1,0)[12]                    : 619.3943
## 
##  Best model: ARIMA(1,1,0)(1,1,0)[12]
## Series: Airlines1 
## ARIMA(1,1,0)(1,1,0)[12] 
## 
## Coefficients:
##           ar1     sar1
##       -0.2250  -0.2274
## s.e.   0.1076   0.1081
## 
## sigma^2 estimated as 92.5:  log likelihood=-304.98
## AIC=615.97   AICc=616.27   BIC=623.22
# tseries evaluation

plot.ts(Newmodel$residuals)

acf(ts(Newmodel$residuals),main = 'ACF Residual')

pacf(ts(Newmodel$residuals),main = 'PACF Residual')

# Forecast for next 2 year
Pass_Forecast <- forecast(Newmodel,Level=c(95),h=10*12)
## Warning in forecast.Arima(Newmodel, Level = c(95), h = 10 * 12): The non-
## existent Level arguments will be ignored.
plot(Pass_Forecast)

# Test your final model

Box.test(Newmodel$resid, lag = 5, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 6.1585, df = 5, p-value = 0.2911
Box.test(Newmodel$resid, lag = 15, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 12.346, df = 15, p-value = 0.6527
Box.test(Newmodel$resid, lag = 10, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  Newmodel$resid
## X-squared = 8.8668, df = 10, p-value = 0.5448