Loading and reconnaissance the dataset

head - show 1st few data

tail - show last few data

ts - time series

data <- AirPassengers

str(data)
##  Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
head(data)
## [1] 112 118 132 129 121 135
tail(data)
## [1] 622 606 508 461 390 432
ts(data, frequency = 12, start = c(1949,1))
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
plot(data)





Log Transformation

(it’s for our first data was non-stationary we’ve seen in plot)

ldata <- log(data)
plot(ldata)





Decomposition

las = 2 - the numbers will appear vertical

ddata <- decompose(data)
ddata$figure
##  [1] -24.748737 -36.188131  -2.241162  -8.036616  -4.506313  35.402778
##  [7]  63.830808  62.823232  16.520202 -20.642677 -53.593434 -28.619949
plot(ddata$figure, type = "b",
     xlab = "Month", ylab = "Seasonality Index",
     col = "Red", las = 2)





Decomposition Plot

plot(ddata)





ARIMA

ARIMA - Autoregressive Intigrated Moving Average


### auto.arima - gives us the best ARIMA model ### ARIMA(0,1,1)(0,1,1)[12] ### Here 1st 0 - p (AR order) ### 2nd 1 - d (degree of differencing) ### 3rd 1 - q (MA order

library(forecast)
## Warning: package 'forecast' was built under R version 4.0.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
model <- auto.arima(ldata)
model
## Series: ldata 
## ARIMA(0,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ma1     sma1
##       -0.4018  -0.5569
## s.e.   0.0896   0.0731
## 
## sigma^2 estimated as 0.001371:  log likelihood=244.7
## AIC=-483.4   AICc=-483.21   BIC=-474.77





ACF

Dotted line - significance bound

acf(model$residuals, main = "Correlogram")





ACF

Dotted line - significance bound

acf(model$residuals, main = "Correlogram")





PACF

Dotted line - significance bound

pacf(model$residuals, main = "Partial Correlogram")





Ljung-Box Test

We will focus on the P-Value

Box.test(model$residuals, lag = 20, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model$residuals
## X-squared = 17.688, df = 20, p-value = 0.6079





Residual Plot

freq = FALSE - We want density rather than frequency

hist(model$residuals, col = "green",
     xlab = "Error", main = "Histogram of residuals",
     freq =  FALSE)
lines(density(model$residuals), col = "Black")





Forecast

48 - Forecasting for next 48 months

pred <- forecast(model, 48)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
autoplot(pred)





Accuracy calculating

accuracy(pred)
##                        ME       RMSE        MAE        MPE      MAPE      MASE
## Training set 0.0005730622 0.03504883 0.02626034 0.01098898 0.4752815 0.2169522
##                    ACF1
## Training set 0.01443892