library(M4comp2018)
library(xts)
library(astsa)
library(ggplot2)
library(forecast)
library(ggfortify)
library(fpp2)
data(M4)
df = data.frame(matrix(ncol = 5, nrow = 100000))
colnames(df) = c("st", "n", "type", "h", "period")
df$st = unlist(Map(function(l) {as.character(l$st[[1]][1])}, M4))
df$n = unlist(Map(function(l) {c(l$n[[1]][1]) }, M4))
df$type = unlist(Map(function(l) {as.character(l$type[[1]][1])}, M4))
df$h = unlist(Map(function(l) {c(l$h[[1]][1]) }, M4))
df$period = unlist(Map(function(l) {as.character(l$period[[1]][1])}, M4))
str(df)
## 'data.frame': 100000 obs. of 5 variables:
## $ st : chr "D1" "D2" "D3" "D4" ...
## $ n : int 1006 1006 130 169 156 1006 1006 999 999 674 ...
## $ type : chr "Macro" "Macro" "Macro" "Macro" ...
## $ h : int 14 14 14 14 14 14 14 14 14 14 ...
## $ period: chr "Daily" "Daily" "Daily" "Daily" ...
table(df$period)
##
## Daily Hourly Monthly Quarterly Weekly Yearly
## 4227 414 48000 24000 359 23000
yearly_M4 = Filter(function(l) l$period == "Yearly", M4)
quarterly_M4 = Filter(function(l) l$period == "Quarterly", M4)
monthly_M4 = Filter(function(l) l$period == "Monthly", M4)
weekly_M4 = Filter(function(l) l$period == "Weekly", M4)
hourly_M4 = Filter(function(l) l$period == "Hourly", M4)
daily_M4 = Filter(function(l) l$period == "Daily", M4)
(including n_ahead data in red)
Extract the first month: full, training, and test set
monthly_1_full = ts(c(monthly_M4[[1]]$x, monthly_M4[[1]]$xx),
start=start(monthly_M4[[1]]$x),
frequency = frequency(monthly_M4[[1]]$x))
monthly_1_train = ts(monthly_M4[[1]]$x,
start=start(monthly_M4[[1]]$x),
frequency = frequency(monthly_M4[[1]]$x))
monthly_1_test = ts(monthly_M4[[1]]$xx,
start=start(monthly_M4[[1]]$xx),
frequency = frequency(monthly_M4[[1]]$xx))
Explore the structure of the training and test set
head(monthly_1_train)
## Jun Jul Aug Sep Oct Nov
## 1976 8000 8350 8570 7700 7080 6520
str(monthly_1_train)
## Time-Series [1:469] from 1976 to 2015: 8000 8350 8570 7700 7080 6520 6070 6650 6830 5710 ...
head(monthly_1_test)
## Jul Aug Sep Oct Nov Dec
## 2015 8720 7790 4770 5060 4720 4450
str(monthly_1_test)
## Time-Series [1:18] from 2016 to 2017: 8720 7790 4770 5060 4720 4450 5120 5960 6560 4900 ...
plot(ts(monthly_1_full,
start = start(monthly_1_full),
frequency = frequency(monthly_1_full)),
type = 'l', col = 'black', ylab ='', xlab ='')
lines(monthly_1_test, col = 'red', type = 'o', xlab = '')
Produce a polar coordinate season plot
ggseasonplot(monthly_1_train, polar = T)
Create subseries plot that comprises mini time plots for each season
ggsubseriesplot(monthly_1_train)
### Plot the monthly sample: Removing trend
autoplot(diff(monthly_1_train))
## 7. Explore time series patterns: Trend, seasonal, or cyclic
acf2(diff(monthly_1_train))
## ACF PACF
## [1,] 0.07 0.07
## [2,] -0.12 -0.12
## [3,] -0.35 -0.34
## [4,] -0.25 -0.26
## [5,] 0.19 0.14
## [6,] 0.04 -0.16
## [7,] 0.13 0.00
## [8,] -0.25 -0.28
## [9,] -0.32 -0.35
## [10,] -0.09 -0.29
## [11,] 0.20 -0.06
## [12,] 0.62 0.37
## [13,] 0.19 0.26
## [14,] -0.13 0.11
## [15,] -0.31 0.08
## [16,] -0.20 0.03
## [17,] 0.14 0.01
## [18,] 0.07 -0.07
## [19,] 0.11 0.08
## [20,] -0.25 -0.02
## [21,] -0.31 -0.07
## [22,] -0.07 -0.06
## [23,] 0.19 -0.09
## [24,] 0.59 0.16
## [25,] 0.16 0.06
## [26,] -0.12 0.01
## [27,] -0.28 0.06
## [28,] -0.20 0.00
## [29,] 0.13 -0.02
## [30,] 0.04 -0.13
## [31,] 0.11 0.01
## [32,] -0.24 -0.04
## [33,] -0.26 0.04
## [34,] -0.11 -0.10
## [35,] 0.19 -0.07
## [36,] 0.57 0.13
## [37,] 0.15 0.04
## [38,] -0.12 -0.05
## [39,] -0.30 -0.02
## [40,] -0.20 -0.10
## [41,] 0.14 -0.02
## [42,] 0.04 -0.09
## [43,] 0.12 0.03
## [44,] -0.25 -0.07
## [45,] -0.25 0.03
## [46,] -0.09 -0.06
## [47,] 0.16 -0.08
## [48,] 0.54 0.01
Box.test(monthly_1_train, fitdf = 0, lag = 24, type = 'Lj')
##
## Box-Ljung test
##
## data: monthly_1_train
## X-squared = 4388.7, df = 24, p-value < 2.2e-16
Box.test(diff(monthly_1_train), fitdf = 0, lag = 24, type = 'Lj')
##
## Box-Ljung test
##
## data: diff(monthly_1_train)
## X-squared = 786.08, df = 24, p-value < 2.2e-16
The Ljung-Box test result (p-value <<< 0.05) shows autocorrelation in both original data and differencing data.
Model summary and plot
snaive_f = snaive(diff(monthly_1_train), h = 18)
autoplot(snaive_f)
summary(snaive_f)
##
## Forecast method: Seasonal naive method
##
## Model Information:
## Call: snaive(y = diff(monthly_1_train), h = 18)
##
## Residual sd: 727.3591
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.9429825 726.5617 565.8114 NaN Inf 1 -0.3595435
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jul 2015 1920 988.8737 2851.12627 495.96526 3344.03474
## Aug 2015 -1390 -2321.1263 -458.87373 -2814.03474 34.03474
## Sep 2015 -1930 -2861.1263 -998.87373 -3354.03474 -505.96526
## Oct 2015 -410 -1341.1263 521.12627 -1834.03474 1014.03474
## Nov 2015 580 -351.1263 1511.12627 -844.03474 2004.03474
## Dec 2015 -480 -1411.1263 451.12627 -1904.03474 944.03474
## Jan 2016 300 -631.1263 1231.12627 -1124.03474 1724.03474
## Feb 2016 -910 -1841.1263 21.12627 -2334.03474 514.03474
## Mar 2016 -40 -971.1263 891.12627 -1464.03474 1384.03474
## Apr 2016 120 -811.1263 1051.12627 -1304.03474 1544.03474
## May 2016 -300 -1231.1263 631.12627 -1724.03474 1124.03474
## Jun 2016 1980 1048.8737 2911.12627 555.96526 3404.03474
## Jul 2016 1920 603.1886 3236.81140 -93.88924 3933.88924
## Aug 2016 -1390 -2706.8114 -73.18860 -3403.88924 623.88924
## Sep 2016 -1930 -3246.8114 -613.18860 -3943.88924 83.88924
## Oct 2016 -410 -1726.8114 906.81140 -2423.88924 1603.88924
## Nov 2016 580 -736.8114 1896.81140 -1433.88924 2593.88924
## Dec 2016 -480 -1796.8114 836.81140 -2493.88924 1533.88924
Fitted values and residuals
autoplot(snaive_f, series = "Train")
Check residuals whether white noise or not
checkresiduals(snaive_f)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 225.16, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
Accuracy of Seasonal Naive model
accuracy(snaive_f, diff(monthly_1_test))
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9429825 726.5617 565.8114 NaN Inf 1.000000
## Test set -30.5882353 815.6845 632.9412 80.09394 97.85567 1.118643
## ACF1 Theil's U
## Training set -0.3595435 NA
## Test set -0.1157613 0.5601452
Model summary and plot
sarima_f = auto.arima(monthly_1_train)
summary(sarima_f)
## Series: monthly_1_train
## ARIMA(1,0,5)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 ma5 sma1
## 0.9791 -0.3524 -0.0601 -0.1436 0.0074 0.1266 -0.7895
## s.e. 0.0110 0.0489 0.0506 0.0520 0.0520 0.0436 0.0338
##
## sigma^2 estimated as 268035: log likelihood=-3507.04
## AIC=7030.07 AICc=7030.39 BIC=7063.07
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.835782 507.1257 389.5809 -0.5104896 6.365607 0.5128426
## ACF1
## Training set -0.0007186712
sarima_f %>%
forecast(h = 18) %>%
autoplot()
Check residuals whether white noise or not
checkresiduals(sarima_f)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,5)(0,1,1)[12]
## Q* = 30.259, df = 17, p-value = 0.02454
##
## Model df: 7. Total lags used: 24
Accuracy of Seasonal Arima model
accuracy(forecast(sarima_f, h = 18), monthly_1_test, d = 0, D = 1)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.835782 507.1257 389.5809 -0.5104896 6.365607 0.5128426
## Test set -111.744545 774.8113 687.0752 -4.4136328 12.765260 0.9044630
## ACF1 Theil's U
## Training set -0.0007186712 NA
## Test set 0.4626465094 0.6106427