#time series data
head(AirPassengers)
## [1] 112 118 132 129 121 135
#find class of data
class(AirPassengers)
## [1] "ts"
#start of the time series
start(AirPassengers)
## [1] 1949 1
#end of the time series
end(AirPassengers)
## [1] 1960 12
#find frequency of time series
frequency(AirPassengers)
## [1] 12
#no. of passengers across the spectrum
summary(AirPassengers)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 104.0 180.0 265.5 280.3 360.5 622.0
#cycle across time series
cycle(AirPassengers)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1949 1 2 3 4 5 6 7 8 9 10 11 12
## 1950 1 2 3 4 5 6 7 8 9 10 11 12
## 1951 1 2 3 4 5 6 7 8 9 10 11 12
## 1952 1 2 3 4 5 6 7 8 9 10 11 12
## 1953 1 2 3 4 5 6 7 8 9 10 11 12
## 1954 1 2 3 4 5 6 7 8 9 10 11 12
## 1955 1 2 3 4 5 6 7 8 9 10 11 12
## 1956 1 2 3 4 5 6 7 8 9 10 11 12
## 1957 1 2 3 4 5 6 7 8 9 10 11 12
## 1958 1 2 3 4 5 6 7 8 9 10 11 12
## 1959 1 2 3 4 5 6 7 8 9 10 11 12
## 1960 1 2 3 4 5 6 7 8 9 10 11 12
#boxplot across months will give us a sense on seasonal effect
boxplot(AirPassengers~cycle(AirPassengers))
In this graph, we can analyze in which month the maximum flow of passengers in the given time series data. In the data of 12 years, we can see that in the month of July and August the travelers are more in each year. So this is the seasonality part of the data.
#plot
plot(AirPassengers)
We can see in the graph that the initial series, for example, the span is smaller than what we see towards the end, so this could create a problem because that makes this time series non-stationary.
#log transformation
plot(log(AirPassengers))
#Differentiation on log data
plot(diff(log(AirPassengers)))
Now the series is stationary and we can apply time series model on it.
#acf plot on original data which was not transformed
acf(AirPassengers)
#acf plot on transformed data
acf(diff(log(AirPassengers)))
#pacf plot on transformed data
pacf(diff(log(AirPassengers)))
With the above ACF and PACF plots on transformed data, we can pick optimal ARIMA values to build ARIMA time series.
#ARIMA model and predict the future
fit <- arima(log(AirPassengers), c(0, 1, 1),
seasonal = list(order=c(0,1,1), period=12))
fit
##
## Call:
## arima(x = log(AirPassengers), order = c(0, 1, 1), seasonal = list(order = c(0,
## 1, 1), period = 12))
##
## Coefficients:
## ma1 sma1
## -0.4018 -0.5569
## s.e. 0.0896 0.0731
##
## sigma^2 estimated as 0.001348: log likelihood = 244.7, aic = -483.4
#predict 10 future values
p <- predict(fit, n.ahead = 10*12)
#converting values into decimal format
pred <- 2.718^p$pred
pred
## Jan Feb Mar Apr May Jun Jul
## 1961 450.1371 425.4501 478.7004 492.0881 508.7261 582.9599 669.5589
## 1962 495.6111 468.4301 527.0599 541.8000 560.1189 641.8519 737.1994
## 1963 545.6789 515.7521 580.3048 596.5340 616.7035 706.6934 811.6731
## 1964 600.8048 567.8546 638.9286 656.7973 679.0044 778.0853 893.6703
## 1965 661.4995 625.2207 703.4748 723.1486 747.5991 856.6894 983.9511
## 1966 728.3259 688.3820 774.5415 796.2029 823.1234 943.2343 1083.3522
## 1967 801.9031 757.9241 852.7876 876.6373 906.2773 1038.5221 1192.7951
## 1968 882.9134 834.4914 938.9384 965.1973 997.8317 1143.4362 1313.2943
## 1969 972.1075 918.7938 1033.7922 1062.7040 1098.6352 1258.9490 1445.9665
## 1970 1070.3122 1011.6126 1138.2285 1170.0609 1209.6220 1386.1311 1592.0416
## Aug Sep Oct Nov Dec
## 1961 666.6280 557.8234 496.8878 429.6018 476.9375
## 1962 733.9724 614.1761 547.0846 473.0012 525.1189
## 1963 808.1201 676.2217 602.3525 520.7850 578.1677
## 1964 889.7584 744.5353 663.2036 573.3960 636.5756
## 1965 979.6440 819.7501 730.2021 631.3219 700.8840
## 1966 1078.6101 902.5633 803.9689 695.0996 771.6891
## 1967 1187.5739 993.7425 885.1878 765.3202 849.6470
## 1968 1307.5456 1094.1328 974.6117 842.6348 935.4804
## 1969 1439.6371 1204.6648 1073.0694 927.7598 1029.9850
## 1970 1585.0728 1326.3630 1181.4735 1021.4844 1134.0366
#Plot
ts.plot(AirPassengers, 2.718^p$pred, log='y', lty = c(1,3))
#testing the model
#create test ts data
datawide <- ts(AirPassengers, frequency = 12, start = c(1949,1), end = c(1959,12))
#model test ts data
test <- arima(log(datawide), c(0,1,1),
seasonal=list(order = c(0,1,1), period = 12))
#predict unseen
p1 <- predict(test, n.ahead = 10*12)
#convert into decimal format
pred1 <- 2.718^p1$pred
data <- head(pred1, 12)
pred_1960 <- round(data, digits = 0)
pred_1960
## [1] 419 399 466 454 473 547 622 630 526 462 406 452
#compare with originalvalues
original_1960 <- tail(AirPassengers, 12)
original_1960
## [1] 417 391 419 461 472 535 622 606 508 461 390 432