library(fpp3)
library(distributional)
library(RColorBrewer)
library(seasonal)
library(mlbench)
library(fmsb)
library(stringr)
Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman. Please submit both the link to your Rpubs and the .pdf file with your run code
The optimal values for alpha and l-naught for a SES model can be found by using ETS and piping the model into report(). These optimal values are selected automatically through Maximum Likelihood Estimation. The dataframes df_alpha and df_l display a summary table of optimal values for alpha and l-naught respectively.
aus_livestock_pigs <- aus_livestock %>% filter(Animal == 'Pigs')
aus_states <- aus_livestock %>% filter(Animal == 'Pigs') %>% distinct(State)
aus_states <- lapply(aus_states,as.character)
pigs_model <- aus_livestock_pigs %>%
model(ANN = ETS(Count ~ error("A")+trend("N")+season("N")))
df_alpha <- data.frame()
df_l <- data.frame()
for (i in aus_states$State) {
new_row <- pigs_model %>%
filter(State == i) %>%
report() %>%
tidy() %>%
filter(term == "alpha")
df_alpha <- rbind(df_alpha,new_row)
}
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.4984782
##
## Initial states:
## l[0]
## 1659.057
##
## sigma^2: 379348.4
##
## AIC AICc BIC
## 10701.17 10701.22 10714.15
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2655419
##
## Initial states:
## l[0]
## 107861.1
##
## sigma^2: 129400339
##
## AIC AICc BIC
## 13955.55 13955.59 13968.52
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.274479
##
## Initial states:
## l[0]
## 317.3623
##
## sigma^2: 17986.84
##
## AIC AICc BIC
## 8999.935 8999.979 9012.908
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2361812
##
## Initial states:
## l[0]
## 79572.86
##
## sigma^2: 49879356
##
## AIC AICc BIC
## 13423.60 13423.65 13436.58
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2812892
##
## Initial states:
## l[0]
## 43108.7
##
## sigma^2: 44439013
##
## AIC AICc BIC
## 13359.16 13359.20 13372.13
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.1821152
##
## Initial states:
## l[0]
## 12365.22
##
## sigma^2: 915519.9
##
## AIC AICc BIC
## 11192.79 11192.84 11205.77
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.214573
##
## Initial states:
## l[0]
## 43478.4
##
## sigma^2: 25777782
##
## AIC AICc BIC
## 13055.27 13055.32 13068.24
for (i in aus_states$State) {
new_row <- pigs_model %>%
filter(State == i) %>%
report() %>%
tidy() %>%
filter(term == "l[0]")
df_l <- rbind(df_l,new_row)
}
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.4984782
##
## Initial states:
## l[0]
## 1659.057
##
## sigma^2: 379348.4
##
## AIC AICc BIC
## 10701.17 10701.22 10714.15
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2655419
##
## Initial states:
## l[0]
## 107861.1
##
## sigma^2: 129400339
##
## AIC AICc BIC
## 13955.55 13955.59 13968.52
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.274479
##
## Initial states:
## l[0]
## 317.3623
##
## sigma^2: 17986.84
##
## AIC AICc BIC
## 8999.935 8999.979 9012.908
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2361812
##
## Initial states:
## l[0]
## 79572.86
##
## sigma^2: 49879356
##
## AIC AICc BIC
## 13423.60 13423.65 13436.58
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2812892
##
## Initial states:
## l[0]
## 43108.7
##
## sigma^2: 44439013
##
## AIC AICc BIC
## 13359.16 13359.20 13372.13
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.1821152
##
## Initial states:
## l[0]
## 12365.22
##
## sigma^2: 915519.9
##
## AIC AICc BIC
## 11192.79 11192.84 11205.77
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.214573
##
## Initial states:
## l[0]
## 43478.4
##
## sigma^2: 25777782
##
## AIC AICc BIC
## 13055.27 13055.32 13068.24
df_alpha
## # A tibble: 8 × 5
## Animal State .model term estimate
## <fct> <fct> <chr> <chr> <dbl>
## 1 Pigs Australian Capital Territory ANN alpha 0.498
## 2 Pigs New South Wales ANN alpha 0.266
## 3 Pigs Northern Territory ANN alpha 0.274
## 4 Pigs Queensland ANN alpha 0.236
## 5 Pigs South Australia ANN alpha 0.281
## 6 Pigs Tasmania ANN alpha 0.182
## 7 Pigs Victoria ANN alpha 0.322
## 8 Pigs Western Australia ANN alpha 0.215
df_l
## # A tibble: 8 × 5
## Animal State .model term estimate
## <fct> <fct> <chr> <chr> <dbl>
## 1 Pigs Australian Capital Territory ANN l[0] 1659.
## 2 Pigs New South Wales ANN l[0] 107861.
## 3 Pigs Northern Territory ANN l[0] 317.
## 4 Pigs Queensland ANN l[0] 79573.
## 5 Pigs South Australia ANN l[0] 43109.
## 6 Pigs Tasmania ANN l[0] 12365.
## 7 Pigs Victoria ANN l[0] 100647.
## 8 Pigs Western Australia ANN l[0] 43478.
df_forecasts <- data.frame()
for (i in aus_states$State) {
print(i)
aus_livestock_pigs_temp <- aus_livestock_pigs %>% filter(State == i)
new_row <- pigs_model %>% filter(State == i) %>% forecast(h = 4) %>% hilo()
df_forecasts <- bind_rows(df_forecasts,new_row)
print_plot <- pigs_model %>%
filter(State == i) %>%
forecast(h = 4) %>%
autoplot() + autolayer(aus_livestock_pigs_temp) +
labs(title = paste("4 Month Forecast of Pig Slaughter for ",i))
print(print_plot)
}
## [1] "Australian Capital Territory"
## [1] "New South Wales"
## [1] "Northern Territory"
## [1] "Queensland"
## [1] "South Australia"
## [1] "Tasmania"
## [1] "Victoria"
## [1] "Western Australia"
df_forecasts
## Animal State .model Month
## 1 Pigs Australian Capital Territory ANN 2019 Jan
## 2 Pigs Australian Capital Territory ANN 2019 Feb
## 3 Pigs Australian Capital Territory ANN 2019 Mar
## 4 Pigs Australian Capital Territory ANN 2019 Apr
## 5 Pigs New South Wales ANN 2019 Jan
## 6 Pigs New South Wales ANN 2019 Feb
## 7 Pigs New South Wales ANN 2019 Mar
## 8 Pigs New South Wales ANN 2019 Apr
## 9 Pigs Northern Territory ANN 2019 Jan
## 10 Pigs Northern Territory ANN 2019 Feb
## 11 Pigs Northern Territory ANN 2019 Mar
## 12 Pigs Northern Territory ANN 2019 Apr
## 13 Pigs Queensland ANN 2019 Jan
## 14 Pigs Queensland ANN 2019 Feb
## 15 Pigs Queensland ANN 2019 Mar
## 16 Pigs Queensland ANN 2019 Apr
## 17 Pigs South Australia ANN 2019 Jan
## 18 Pigs South Australia ANN 2019 Feb
## 19 Pigs South Australia ANN 2019 Mar
## 20 Pigs South Australia ANN 2019 Apr
## 21 Pigs Tasmania ANN 2019 Jan
## 22 Pigs Tasmania ANN 2019 Feb
## 23 Pigs Tasmania ANN 2019 Mar
## 24 Pigs Tasmania ANN 2019 Apr
## 25 Pigs Victoria ANN 2019 Jan
## 26 Pigs Victoria ANN 2019 Feb
## 27 Pigs Victoria ANN 2019 Mar
## 28 Pigs Victoria ANN 2019 Apr
## 29 Pigs Western Australia ANN 2019 Jan
## 30 Pigs Western Australia ANN 2019 Feb
## 31 Pigs Western Australia ANN 2019 Mar
## 32 Pigs Western Australia ANN 2019 Apr
## Count .mean 80%
## 1 N(6.620284e-78, 379348.4) 6.620284e-78 [ -789.3238, 789.3238]80
## 2 N(6.620284e-78, 473609.1) 6.620284e-78 [ -881.9543, 881.9543]80
## 3 N(6.620284e-78, 567869.8) 6.620284e-78 [ -965.7405, 965.7405]80
## 4 N(6.620284e-78, 662130.5) 6.620284e-78 [ -1042.8164, 1042.8164]80
## 5 N(73470.44, 129400339) 7.347044e+04 [ 58892.2429, 88048.6355]80
## 6 N(73470.44, 138524684) 7.347044e+04 [ 58387.0246, 88553.8538]80
## 7 N(73470.44, 147649028) 7.347044e+04 [ 57898.1887, 89042.6897]80
## 8 N(73470.44, 156773372) 7.347044e+04 [ 57424.2379, 89516.6405]80
## 9 N(1.376158e-17, 17986.84) 1.376158e-17 [ -171.8753, 171.8753]80
## 10 N(1.376158e-17, 19341.94) 1.376158e-17 [ -178.2322, 178.2322]80
## 11 N(1.376158e-17, 20697.05) 1.376158e-17 [ -184.3700, 184.3700]80
## 12 N(1.376158e-17, 22052.15) 1.376158e-17 [ -190.3100, 190.3100]80
## 13 N(97840.26, 49879356) 9.784026e+04 [ 88789.2620, 106891.2594]80
## 14 N(97840.26, 52661704) 9.784026e+04 [ 88540.2480, 107140.2733]80
## 15 N(97840.26, 55444053) 9.784026e+04 [ 88297.7299, 107382.7914]80
## 16 N(97840.26, 58226401) 9.784026e+04 [ 88061.2243, 107619.2970]80
## 17 N(109006.2, 44439013) 1.090062e+05 [100463.0052, 117549.3153]80
## 18 N(109006.2, 47955188) 1.090062e+05 [100131.4561, 117880.8644]80
## 19 N(109006.2, 51471362) 1.090062e+05 [ 99811.8550, 118200.4655]80
## 20 N(109006.2, 54987537) 1.090062e+05 [ 99502.9964, 118509.3241]80
## 21 N(2829.825, 915519.9) 2.829825e+03 [ 1603.6009, 4056.0499]80
## 22 N(2829.825, 945884) 2.829825e+03 [ 1583.4324, 4076.2185]80
## 23 N(2829.825, 976248.1) 2.829825e+03 [ 1563.5850, 4096.0659]80
## 24 N(2829.825, 1006612) 2.829825e+03 [ 1544.0439, 4115.6069]80
## 25 N(95186.56, 87480760) 9.518656e+04 [ 83200.0583, 107173.0566]80
## 26 N(95186.56, 96558142) 9.518656e+04 [ 82593.5188, 107779.5961]80
## 27 N(95186.56, 105635524) 9.518656e+04 [ 82014.8802, 108358.2347]80
## 28 N(95186.56, 114712906) 9.518656e+04 [ 81460.6133, 108912.5016]80
## 29 N(68591.56, 25777782) 6.859156e+04 [ 62084.8884, 75098.2308]80
## 30 N(68591.56, 26964632) 6.859156e+04 [ 61936.7852, 75246.3339]80
## 31 N(68591.56, 28151482) 6.859156e+04 [ 61791.9071, 75391.2120]80
## 32 N(68591.56, 29338332) 6.859156e+04 [ 61650.0522, 75533.0670]80
## 95%
## 1 [-1207.1666, 1207.1666]95
## 2 [-1348.8328, 1348.8328]95
## 3 [-1476.9727, 1476.9727]95
## 4 [-1594.8501, 1594.8501]95
## 5 [51175.0120, 95765.8664]95
## 6 [50402.3472, 96538.5312]95
## 7 [49654.7372, 97286.1412]95
## 8 [48929.8920, 98010.9863]95
## 9 [ -262.8606, 262.8606]95
## 10 [ -272.5826, 272.5826]95
## 11 [ -281.9696, 281.9696]95
## 12 [ -291.0540, 291.0540]95
## 13 [83997.9527, 111682.5687]95
## 14 [83617.1187, 112063.4027]95
## 15 [83246.2192, 112434.3021]95
## 16 [82884.5152, 112796.0061]95
## 17 [95940.5321, 122071.7884]95
## 18 [95433.4715, 122578.8489]95
## 19 [94944.6838, 123067.6366]95
## 20 [94472.3253, 123539.9952]95
## 21 [ 954.4769, 4705.1740]95
## 22 [ 923.6317, 4736.0192]95
## 23 [ 893.2778, 4766.3731]95
## 24 [ 863.3923, 4796.2585]95
## 25 [76854.7889, 113518.3260]95
## 26 [75927.1668, 114445.9480]95
## 27 [75042.2154, 115330.8995]95
## 28 [74194.5374, 116178.5775]95
## 29 [58640.4647, 78542.6544]95
## 30 [58413.9605, 78769.1587]95
## 31 [58192.3886, 78990.7306]95
## 32 [57975.4401, 79207.6791]95
I can compute the 95% prediction interval by using the variance given in the distribution class column, Count, which is represented in the second term. I had difficulty extracting this, and ended up doing so by using a regex to take any number between a comma with whitespace and a closed parenthesis. I can see that this manual calculation of prediction interval gives the approximate same prediction interval as the one calculated by R, with some precision removed due to the use of a string to extract the values and 1.96 as a rounded representation of Z score.
options(scipen = 10)
df_forecasts %>% filter(month(Month) == 1) %>%
mutate(string_dist = as.character(Count)) %>%
mutate(
var = as.numeric(str_extract(string_dist, "(?<=,\\s)\\d+(?=\\))"))) %>%
mutate (calculated_PI_lower = .mean - 1.96*sqrt(var)) %>%
mutate (calculated_PI_upper = .mean + 1.96*sqrt(var))
## Animal State .model Month Count
## 1 Pigs Australian Capital Territory ANN 2019 Jan N(6.620284e-78, 379348.4)
## 2 Pigs New South Wales ANN 2019 Jan N(73470.44, 129400339)
## 3 Pigs Northern Territory ANN 2019 Jan N(1.376158e-17, 17986.84)
## 4 Pigs Queensland ANN 2019 Jan N(97840.26, 49879356)
## 5 Pigs South Australia ANN 2019 Jan N(109006.2, 44439013)
## 6 Pigs Tasmania ANN 2019 Jan N(2829.825, 915519.9)
## 7 Pigs Victoria ANN 2019 Jan N(95186.56, 87480760)
## 8 Pigs Western Australia ANN 2019 Jan N(68591.56, 25777782)
## .mean 80% 95%
## 1 6.620284e-78 [ -789.3238, 789.3238]80 [-1207.1666, 1207.1666]95
## 2 7.347044e+04 [ 58892.2429, 88048.6355]80 [51175.0120, 95765.8664]95
## 3 1.376158e-17 [ -171.8753, 171.8753]80 [ -262.8606, 262.8606]95
## 4 9.784026e+04 [ 88789.2620, 106891.2594]80 [83997.9527, 111682.5687]95
## 5 1.090062e+05 [100463.0052, 117549.3153]80 [95940.5321, 122071.7884]95
## 6 2.829825e+03 [ 1603.6009, 4056.0499]80 [ 954.4769, 4705.1740]95
## 7 9.518656e+04 [ 83200.0583, 107173.0566]80 [76854.7889, 113518.3260]95
## 8 6.859156e+04 [ 62084.8884, 75098.2308]80 [58640.4647, 78542.6544]95
## string_dist var calculated_PI_lower calculated_PI_upper
## 1 N(6.6e-78, 379348) 379348 -1207.1882 1207.1882
## 2 N(73470, 129400339) 129400339 51174.6023 95766.2761
## 3 N(1.4e-17, 17987) 17987 -262.8666 262.8666
## 4 N(97840, 49879356) 49879356 83997.6983 111682.8231
## 5 N(109006, 44439013) 44439013 95940.2920 122072.0285
## 6 N(2830, 915520) 915520 954.4424 4705.2085
## 7 N(95187, 87480760) 87480760 76854.4521 113518.6628
## 8 N(68592, 25777782) 25777782 58640.2819 78542.8373
options(scipen = 0)
Using Indonesia as the country to analyze, I plotted exports over time and can see that exports as a percentage of GDP appear to increase up until the late 1990s, where it begins to decrease again. The plot shows one major spike (at the late 1990s mark), before a steep decline Since exports is expressed as a function of percent GDP, I also performed a transformation to retrieve exports in $USD rather than as a percent of GDP. Afterwards, I also used CPI to adjust for inflation, and plotted the result. After doing this, we can see that the biggest spike in exports was actually in 1980, followed by a decline and another peak in the 1990s. There is also another slight peak in the early 2010s.
global_economy_indonesia <- global_economy %>%
filter(Country == "Indonesia") %>%
mutate(exports_USD = Exports * GDP/100) %>%
mutate(adjusted_exports_USD = exports_USD / CPI * 100) %>%
drop_na()
global_economy_indonesia %>% autoplot(Exports)
global_economy_indonesia %>%
autoplot(adjusted_exports_USD) +
labs(x = "Time", y = "$USD",
title = "Inflation Adjusted Exports for Indonesia")
Forecasting the series with an ETS(A,N,N) model, I can see that the resulting forecast is similar to a Naive forecast due to its high smoothing parameter alpha (0.9999).
ge_indonesia_model <- global_economy_indonesia %>%
model(ANN = ETS(adjusted_exports_USD ~ error("A")+trend("N")+season("N")))
global_economy_indonesia_temp <-
global_economy_indonesia %>%
select(adjusted_exports_USD)
fc <- ge_indonesia_model %>% forecast(h = 10)
fc %>% autoplot() + autolayer(global_economy_indonesia_temp) +
labs(x = "Time", y = "$USD",
title = "Inflation Adjusted Exports for Indonesia")
ge_indonesia_model %>% report()
## Series: adjusted_exports_USD
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 164269758014
##
## sigma^2: 1.591754e+21
##
## AIC AICc BIC
## 2694.258 2694.769 2700.054
I used the accuracy function on the model to compute the RMSE of the training data. Since RMSE is influenced by scale, it is difficult to interpret on its own. As indicated previously, the forecast is similar to the Naive forecast, and this is evident in the RMSSE which is just barely under 1. The scaled errors use the naive forecasts to account for the scale of the error, so having an RMSSE of close to 1 would indicate that the model is close to the Naive forecast.
accuracy(ge_indonesia_model)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Indonesia ANN Traini… -3.68e8 3.91e10 2.79e10 -2.23 15.2 1.01 0.999 0.157
For this dataset, the trended model also takes into account the general downward trend in exports. While taking this general downward trend may be beneficial, it appears that the latest values are exhibiting a local upward trend. This local upward trend is therefore not shown in the forecast using the trended model, which may be a disadvantage. Looking at the AICc, it also appears that the untrended model is a better fit to the data as it exhibits a lower information criterion..
ge_indonesia_model2 <- global_economy_indonesia %>%
model(ANN = ETS(adjusted_exports_USD ~ error("A")+trend("A")+season("N")))
fc2 <- ge_indonesia_model2 %>% forecast(h = 10)
fc2 %>% autoplot() + autolayer(global_economy_indonesia_temp) +
labs(x = "Time", y = "$USD",
title = "Inflation Adjusted Exports for Indonesia")
accuracy(ge_indonesia_model2)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Indonesia ANN Train… -6.58e9 4.14e10 2.97e10 -4.90 16.6 1.07 1.06 0.0544
ge_indonesia_model %>% report()
## Series: adjusted_exports_USD
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 164269758014
##
## sigma^2: 1.591754e+21
##
## AIC AICc BIC
## 2694.258 2694.769 2700.054
ge_indonesia_model2 %>% report()
## Series: adjusted_exports_USD
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998997
## beta = 0.07124248
##
## Initial states:
## l[0] b[0]
## 45614182461 22327694439
##
## sigma^2: 1.861234e+21
##
## AIC AICc BIC
## 2704.109 2705.443 2713.769
To assess the accuracy of forecasting for each method, I split the observation data into a training set and a test set. Using the testing data, I assessed the accuracy of the forecast with the accuracy() function. The forecast of the SES model, without trend, appeared to be a better forecast as the RMSE was lower.
global_economy_indonesia_train <-
global_economy_indonesia %>% filter(Year < 2008)
mod1 <- global_economy_indonesia_train %>%
model(ANN = ETS(adjusted_exports_USD ~ error("A")+trend("N")+season("N")))
SES_fc1 <- mod1 %>% forecast(h=10)
SES_fc1 %>% autoplot() +
autolayer(global_economy_indonesia_temp) +
labs(x = "Time", y = "$USD",
title = "Inflation Adjusted Exports for Indonesia")
mod2 <- global_economy_indonesia_train %>%
model(AAN = ETS(adjusted_exports_USD ~ error("A")+trend("A")+season("N")))
T_fc2 <- mod2 %>% forecast(h=10)
T_fc2 %>% autoplot() + autolayer(global_economy_indonesia_temp) +
labs(x = "Time", y = "$USD",
title = "Inflation Adjusted Exports for Indonesia")
global_economy_indonesia_test <-
global_economy_indonesia %>% filter(Year >= 2008)
SES_fc1 %>% accuracy(global_economy_indonesia_test)
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ANN Indonesia Test 1.47e10 3.32e10 2.77e10 5.87 15.5 NaN NaN 0.581
T_fc2 %>% accuracy(global_economy_indonesia_test)
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AAN Indonesia Test 2.19e10 3.60e10 2.83e10 10.5 15.2 NaN NaN 0.558
Using the accuracy function on the model, I can find the RMSE. Assuming that distribution of future observations is normal, the 95% prediction interval for the one step ahead forecast can be expressed by the forecast +/- 1.96 * RMSE, which is used as an estimate of the standard deviation. The 95% prediction interval calculated with RMSE is slightly different than the one calculated in R, but is accurate within 5%.
PI_fc <- ge_indonesia_model %>% forecast(h=1)
PI_fc2 <- ge_indonesia_model2 %>% forecast(h=1)
#RMSE
RMSE1 <- ge_indonesia_model %>% accuracy()
lower_PI1 <- PI_fc[5] - 1.96 * RMSE1[5]
upper_PI1 <- PI_fc[5] + 1.96 * RMSE1[5]
lower_PI1
## .mean
## 1 68848073967
upper_PI1
## .mean
## 1 222146273620
PI_fc %>% hilo()
## # A tsibble: 1 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 Indonesia ANN 2018
## # ℹ 4 more variables: adjusted_exports_USD <dist>, .mean <dbl>, `80%` <hilo>,
## # `95%` <hilo>
RMSE2 <-ge_indonesia_model2 %>% accuracy()
lower_PI2 <- PI_fc2[5] - 1.96 * RMSE2[5]
upper_PI2 <- PI_fc2[5] + 1.96 * RMSE2[5]
lower_PI2
## .mean
## 1 62730723393
upper_PI2
## .mean
## 1 2.2508e+11
PI_fc2%>% hilo()
## # A tsibble: 1 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 Indonesia ANN 2018
## # ℹ 4 more variables: adjusted_exports_USD <dist>, .mean <dbl>, `80%` <hilo>,
## # `95%` <hilo>
I created three different forecasts with (1) a simple exponential smoothing model, (2) a trended model, and (3) a damped trended model. In the simple exponential smoothing model, we can see in this specific dataset that it acted similar to the naive forecasting method. This is likely caused by the steep incline in China’s GDP over the last 20 years, which weights the values towards the higher end. In the trended model, we can see that the forecast with this data is very similar to the most recent slopes probably due to the steep incline at a consistent rate. The damped trend model exhibits a very similar initial slope, but clearly tapers off towards the latter part of the forecast.
global_economy_china <- global_economy %>% filter(Country == "China")
ge_china_models <- global_economy_china %>%
model(SES = ETS(GDP ~ error("A")+trend("N")+season("N")),
Trend = ETS(GDP ~ error("A")+trend("A")+season("N")),
dampedTrend = ETS(GDP ~ error("A")+trend("Ad")+season("N"))
)
ge_china_models %>% forecast(h = 30) %>%
autoplot(level = NULL) + autolayer(global_economy_china)
After transforming the data with a log function (which is the same as a Box-Cox transformation using a lambda of 0, since the ideal lambda was determined to be close to 0 using the Guerrero features), I created three different forecasts again with the same three models. Here, the damped trend is shown better as the forecasts diverge more quickly from the trended model. The SES model again appears similar to the naive forecast. This is due to its high alpha value of 0.9999, which essentially means the weight on the latest observation is very high. Since there is no trend added, this results in a forecast that is almost identical to the naive forecast.
global_economy_china %>% features(GDP, features = guerrero)
## # A tibble: 1 × 2
## Country lambda_guerrero
## <fct> <dbl>
## 1 China -0.0345
global_economy_china_log <- global_economy_china %>%
mutate(logGDP = log(GDP))
ge_china_models_log <- global_economy_china_log %>%
model(SES = ETS(logGDP ~ error("A")+trend("N")+season("N")),
Trend = ETS(logGDP ~ error("A")+trend("A")+season("N")),
dampedTrend = ETS(logGDP ~ error("A")+trend("Ad")+season("N"))
)
global_economy_china_log <- global_economy_china_log %>% select(logGDP)
ge_china_models_log %>%
forecast(h = 30) %>%
autoplot(level = NULL) +
autolayer(global_economy_china_log)
global_economy_china_log %>%
model(SES = ETS(logGDP ~ error("A")+trend("N")+season("N"))) %>%
report()
## Series: logGDP
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l[0]
## 24.77468
##
## sigma^2: 0.0177
##
## AIC AICc BIC
## 5.470638 5.915082 11.651967
global_economy_china_log %>%
model(Trend = ETS(logGDP ~ error("A")+trend("A")+season("N"))) %>%
report()
## Series: logGDP
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.1079782
##
## Initial states:
## l[0] b[0]
## 24.77007 0.04320226
##
## sigma^2: 0.0088
##
## AIC AICc BIC
## -33.07985 -31.92600 -22.77763
global_economy_china_log %>%
model(dampedTrend = ETS(logGDP ~ error("A")+trend("Ad")+season("N"))) %>%
report()
## Series: logGDP
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.2480956
## phi = 0.979999
##
## Initial states:
## l[0] b[0]
## 24.82881 0.003249391
##
## sigma^2: 0.0091
##
## AIC AICc BIC
## -30.46070 -28.81364 -18.09804
Multiplicative seasonality is necessary here because the seasonality of the time series has changed over time. The seasonality in the 1950s began with a much smaller amplitude, and has gradually increased in amplitude starting around 1970. Making the trend damped does not appear to have much affect, as the damping parameter is fairly high at 0.98. In comparison, damping parameter of 1 would imply no dampening effect. The model with the undamped trend is shown to be a better fit to the data, with a lower corrected AIC value.
aus_production_gas <- aus_production %>% select(Gas)
gas_fit <- aus_production_gas %>% model(ETS(Gas))
gas_fc <- gas_fit %>% forecast(h = 40)
gas_fc %>% autoplot() + autolayer(aus_production_gas)
gas_fit %>% report()
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
gas_fit_damped <- aus_production_gas %>%
model(ETS(Gas ~ error("M")+trend("Ad")+season("M")))
gas_fc_damped <- gas_fit_damped %>% forecast(h = 40)
gas_fc_damped %>% autoplot() + autolayer(aus_production_gas)
gas_fit_damped %>% report()
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
Multiplicative seasonality is necessary for this series for the same reason it was necesssary in question 7 - the amplitude of seasonality changes over time.
set.seed(32)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
autoplot(myseries, Turnover)
Using a damped trend, the forecasts appear to begin to exhibit a decrease trend almost immediately.
ms_fit <- myseries %>% model(ETS(Turnover))
ms_fc <- ms_fit %>% forecast(h=120)
ms_fc %>% autoplot(level = NULL) + autolayer(myseries)
ms_fit_damped <- myseries %>%
model(ETS(Turnover ~ error("M")+trend("Ad")+season("M")))
ms_fc_damped <- ms_fit_damped %>% forecast(h=120)
ms_fc_damped %>% autoplot(level = NULL) + autolayer(myseries)
Taking a training set consisting of all but the last observation, I calculated the RMSE of the one-step forecast for each method. The RMSE is lower for the damped trend method than for the undamped trend method. However, this only takes into account the RMSE for one point forecast. Since the model fit was determined to be better for the undamped trend model via the corrected AIC, I would still choose the undamped trend.
myseries_train <- myseries %>% filter(ym(Month) < "2018-12-01")
myseries_train_fit <- myseries_train %>% model(ETS(Turnover))
myseries_train_fc <- myseries_train_fit %>% forecast(h=1)
myseries_test <- myseries %>% filter(ym(Month) == "2018-12-01")
myseries_train_fc %>% accuracy(myseries_test)
## Warning: 1 error encountered
## [1] subscript out of bounds
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Turn… Sout… Pharmac… Test -2.08 2.08 2.08 -1.49 1.49 NaN NaN NA
myseries_train2 <- myseries %>% filter(ym(Month) < "2018-12-01")
myseries_train_fit2 <- myseries_train2 %>%
model(ETS(Turnover ~ error("M")+trend("Ad")+season("M")))
myseries_train_fc2 <- myseries_train_fit2 %>% forecast(h=1)
myseries_test2 <- myseries %>% filter(ym(Month) == "2018-12-01")
myseries_train_fc2 %>% accuracy(myseries_test2)
## Warning: 1 error encountered
## [1] subscript out of bounds
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Tur… Sout… Pharmac… Test -1.52 1.52 1.52 -1.09 1.09 NaN NaN NA
Looking at the accuracy of the model for each method, I can see that the RMSE for the damped model is slightly higher than the RMSE for the undamped model as well.
accuracy(ms_fit)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South … Pharmac… ETS(T… Trai… 0.128 2.93 2.03 0.101 4.04 0.402 0.410 0.0545
accuracy(ms_fit_damped)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South … Pharmac… "ETS(… Trai… 0.165 2.94 2.03 0.290 4.03 0.403 0.413 0.0326
The residuals appear homoscedastic, the distribution appears roughly normal. However, there are multiple spikes in the autocorrelations plot that are outside of the autocorrelations bounds. Since these spikes exceed the threshold of 5% significantly, this would imply the model is not a good fit. Due to this, I also checked the residuals of the damped trend. The same issue appear, as the residuals also look to be homoscedastic and with a normal distribution but the autocorrelation bounds are again exceeded.
ms_fit %>% gg_tsresiduals()
ms_fit_damped %>% gg_tsresiduals()
Splitting the observations into a training set pre-2011 and a test set post-2011, I obtain a RMSE of 11.5. This does beat the seasonal naive approach, and can be seen visually in the graph of the forecasts against the actual observations. The seasonal naive plot shows that the actual data is beyond the bounds of both the 80% and 95% prediction intervals, while this is not the case for the Holt-Winters model.
myseries_train <- myseries %>% filter(year(Month) < 2011)
myseries_train_fit <- myseries_train %>% model(ETS(Turnover))
myseries_train_fc <- myseries_train_fit %>% forecast(h=96)
myseries_test <- myseries %>% filter(year(Month) >= 2011)
myseries_train_fc %>% accuracy(myseries_test)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Turn… Sout… Pharmac… Test 8.49 11.5 9.12 7.00 7.62 NaN NaN 0.776
myseries_train_fc %>% autoplot() + autolayer(myseries)
myseries_train_naive_fit <- myseries_train %>%
model(seasonal_naive = SNAIVE(Turnover))
myseries_train_naive_fc <- myseries_train_naive_fit %>% forecast(h=96)
myseries_train_naive_fc %>% accuracy(myseries_test)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 seasonal… Sout… Pharmac… Test 18.5 22.7 18.8 15.1 15.5 NaN NaN 0.912
myseries_train_naive_fc %>% autoplot() + autolayer(myseries)
After performing an STL composition applied to the box-cox transformed series, and then modeling with ETS on the seasonally adjusted data, the resulting model had a 0.216 RMSE, which is significantly less than in previous exercises. An STL composition on the box-cox transformed series allowed the removal of seasonality from the data. Forecasting resulted in a a much lower RMSE, even when comparing the accuracy against the actual (box-cox transformed) observations and not just the seasonally adjusted observations.
#break up training data again
myseries_train <- myseries %>%
filter(year(Month) < 2011)
#find ideal lambda
myseries_train %>% features(Turnover, features = guerrero)
## # A tibble: 1 × 3
## State Industry lambda_guerrero
## <chr> <chr> <dbl>
## 1 South Australia Pharmaceutical, cosmetic and toiletry goods r… 0.0611
#perform box_cox transform on the testing data, and rename
myseries_test <- myseries %>% filter(year(Month) >= 2011) %>%
mutate(box_cox = box_cox(Turnover,0.0611)) %>%
rename(season_adjust = box_cox) #rename to use as comparison for accuracy
#full series transform for graphing in autolayer
#could have done this step before breaking up the training/test
myseries_transformed_full <- myseries %>%
mutate(box_cox = box_cox(Turnover,0.0611)) %>%
select(box_cox)
comp_full <- myseries_transformed_full %>% model(stl = STL(box_cox))
myseries_decomp_full <- components(comp_full)
#perform box_cox transform on training data
myseries_transformed <- myseries_train %>%
mutate(box_cox = box_cox(Turnover,0.0611))
#STL decomposition
comp <- myseries_transformed %>% model(stl = STL(box_cox))
#find components
myseries_decomp <- components(comp)
#fit model to seasonally adjusted data
SeA_myseries_fit <- myseries_decomp %>%
model(ETS(season_adjust)) %>%
rename(.model1 = .model)
#forecast 8 years ahead
SeA_myseries_fc <- SeA_myseries_fit %>% forecast(h = 96)
#plot forecast with actual data
SeA_myseries_fc %>% autoplot() +
autolayer(myseries_transformed_full) +
labs(x = "Time", y = "Box-Cox Transformed Turnover",
title = "8 Year Forecast based on
Seasonally Adjusted Box-Cox Transformed Turnover")
#assess data against the actual observations
SeA_myseries_fc %>% accuracy(myseries_test)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(sea… Sout… Pharmac… Test -0.154 0.216 0.171 -2.80 3.11 NaN NaN 0.641