data <- AirPassengers
str(data)
## Time-Series [1:144] from 1949 to 1961: 112 118 132 129 121 135 148 148 136 119 ...
head(data)
## [1] 112 118 132 129 121 135
tail(data)
## [1] 622 606 508 461 390 432
ts(data, frequency = 12, start = c(1949,1))
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 112 118 132 129 121 135 148 148 136 119 104 118
## 1950 115 126 141 135 125 149 170 170 158 133 114 140
## 1951 145 150 178 163 172 178 199 199 184 162 146 166
## 1952 171 180 193 181 183 218 230 242 209 191 172 194
## 1953 196 196 236 235 229 243 264 272 237 211 180 201
## 1954 204 188 235 227 234 264 302 293 259 229 203 229
## 1955 242 233 267 269 270 315 364 347 312 274 237 278
## 1956 284 277 317 313 318 374 413 405 355 306 271 306
## 1957 315 301 356 348 355 422 465 467 404 347 305 336
## 1958 340 318 362 348 363 435 491 505 404 359 310 337
## 1959 360 342 406 396 420 472 548 559 463 407 362 405
## 1960 417 391 419 461 472 535 622 606 508 461 390 432
plot(data)
ldata <- log(data)
plot(ldata)
ddata <- decompose(data)
ddata$figure
## [1] -24.748737 -36.188131 -2.241162 -8.036616 -4.506313 35.402778
## [7] 63.830808 62.823232 16.520202 -20.642677 -53.593434 -28.619949
plot(ddata$figure, type = "b",
xlab = "Month", ylab = "Seasonality Index",
col = "Red", las = 2)
plot(ddata)
### auto.arima - gives us the best ARIMA model ### ARIMA(0,1,1)(0,1,1)[12] ### Here 1st 0 - p (AR order) ### 2nd 1 - d (degree of differencing) ### 3rd 1 - q (MA order
library(forecast)
## Warning: package 'forecast' was built under R version 4.0.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
model <- auto.arima(ldata)
model
## Series: ldata
## ARIMA(0,1,1)(0,1,1)[12]
##
## Coefficients:
## ma1 sma1
## -0.4018 -0.5569
## s.e. 0.0896 0.0731
##
## sigma^2 estimated as 0.001371: log likelihood=244.7
## AIC=-483.4 AICc=-483.21 BIC=-474.77
acf(model$residuals, main = "Correlogram")
acf(model$residuals, main = "Correlogram")
pacf(model$residuals, main = "Partial Correlogram")
Box.test(model$residuals, lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: model$residuals
## X-squared = 17.688, df = 20, p-value = 0.6079
hist(model$residuals, col = "green",
xlab = "Error", main = "Histogram of residuals",
freq = FALSE)
lines(density(model$residuals), col = "Black")
pred <- forecast(model, 48)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
autoplot(pred)
accuracy(pred)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0005730622 0.03504883 0.02626034 0.01098898 0.4752815 0.2169522
## ACF1
## Training set 0.01443892