To predict the airline tickets’ sales of 1961 using Time Series Analysis.
10 year air-ticket sales data of airline industry from 1949-1960
#Install and load parkages
#install.packages("forecast")
#install.packages("tseries")
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tseries)
data("AirPassengers")
class(AirPassengers)
## [1] "ts"
start(AirPassengers)
## [1] 1949 1
end(AirPassengers)
## [1] 1960 12
frequency(AirPassengers)
## [1] 12
sum(is.na(AirPassengers))
## [1] 0
summary(AirPassengers)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104.0 180.0 265.5 280.3 360.5 622.0
AirPassengers
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
tsdata<-ts(AirPassengers, frequency = 12)
ddata <- decompose(tsdata, "multiplicative")
plot(ddata)
plot(ddata$trend)
plot(ddata$seasonal)
plot(ddata$random)
plot(AirPassengers)
abline(reg = lm(AirPassengers~time(AirPassengers)))
cycle(AirPassengers)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
boxplot(AirPassengers~cycle(AirPassengers, xlab="Date"))
plot(AirPassengers)
mymodel <- auto.arima(AirPassengers)
mymodel
## Series: AirPassengers
## ARIMA(2,1,1)(0,1,0)[12]
##
## Coefficients:
## ar1 ar2 ma1
## 0.5960 0.2143 -0.9819
## s.e. 0.0888 0.0880 0.0292
##
## sigma^2 = 132.3: log likelihood = -504.92
## AIC=1017.85 AICc=1018.17 BIC=1029.35
trace to compare the information
criteriaauto.arima(AirPassengers, ic="aic", trace=TRUE)
##
## ARIMA(2,1,2)(1,1,1)[12] : Inf
## ARIMA(0,1,0)(0,1,0)[12] : 1031.508
## ARIMA(1,1,0)(1,1,0)[12] : 1020.393
## ARIMA(0,1,1)(0,1,1)[12] : 1021.003
## ARIMA(1,1,0)(0,1,0)[12] : 1020.394
## ARIMA(1,1,0)(2,1,0)[12] : 1019.24
## ARIMA(1,1,0)(2,1,1)[12] : Inf
## ARIMA(1,1,0)(1,1,1)[12] : Inf
## ARIMA(0,1,0)(2,1,0)[12] : 1032.12
## ARIMA(2,1,0)(2,1,0)[12] : 1021.12
## ARIMA(1,1,1)(2,1,0)[12] : 1021.033
## ARIMA(0,1,1)(2,1,0)[12] : 1019.178
## ARIMA(0,1,1)(1,1,0)[12] : 1020.425
## ARIMA(0,1,1)(2,1,1)[12] : Inf
## ARIMA(0,1,1)(1,1,1)[12] : Inf
## ARIMA(0,1,2)(2,1,0)[12] : 1021.148
## ARIMA(1,1,2)(2,1,0)[12] : 1022.805
##
## Best model: ARIMA(0,1,1)(2,1,0)[12]
## Series: AirPassengers
## ARIMA(0,1,1)(2,1,0)[12]
##
## Coefficients:
## ma1 sar1 sar2
## -0.3634 -0.1239 0.1911
## s.e. 0.0899 0.0934 0.1036
##
## sigma^2 = 133.5: log likelihood = -505.59
## AIC=1019.18 AICc=1019.5 BIC=1030.68
– #adf.test(mymodel)
plot.ts(mymodel$residuals)
acf(ts(mymodel$residuals), main="ACF Residual")
pacf(ts(mymodel$residuals), main="PACF Residual")
myforecast<-forecast(mymodel, level=c(95), h=10*12)
plot(myforecast)
Box.test(mymodel$resid, lag=5, type="Ljung-Box")
##
## Box-Ljung test
##
## data: mymodel$resid
## X-squared = 2.9244, df = 5, p-value = 0.7116
Box.test(mymodel$resid, lag=10, type="Ljung-Box")
##
## Box-Ljung test
##
## data: mymodel$resid
## X-squared = 8.6878, df = 10, p-value = 0.562
Box.test(mymodel$resid, lag=15, type="Ljung-Box")
##
## Box-Ljung test
##
## data: mymodel$resid
## X-squared = 11.582, df = 15, p-value = 0.7104
The p-values are quite insignificant, indicating that the model is free of autocorrelation.
From the ARIMA output, the model using (2,1,1) has been shown to adequately fit the data.