library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
library(GGally)
library(feasts)
#PROBLEM 1A
aus_livestock
## # A tsibble: 29,364 x 4 [1M]
## # Key: Animal, State [54]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
## 7 1977 Jan Bulls, bullocks and steers Australian Capital Territory 1800
## 8 1977 Feb Bulls, bullocks and steers Australian Capital Territory 1900
## 9 1977 Mar Bulls, bullocks and steers Australian Capital Territory 2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory 2300
## # … with 29,354 more rows
## # ℹ Use `print(n = ...)` to see more rows
aus_pigs <- aus_livestock %>%
filter(Animal == "Pigs") %>%
filter(State == "Victoria") %>%
filter(Month >= yearmonth(as.Date("2010-01-01")))
aus_pigs_expo <- aus_pigs %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(aus_pigs_expo)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.225021
##
## Initial states:
## l[0]
## 67178.49
##
## sigma^2: 49463947
##
## AIC AICc BIC
## 2423.061 2423.292 2431.107
#I filtered the aus_livestock data set to only show us pigs slaughtered in Victoria from January 2010 and onwards. I then created a simple exponential smoothing model using the ETS function and ran a report to find the optimal smoothing patterns. The optimal value for alpha is equal to 0.225021 and the optimal value for l0 is equal to 67,178.49.
aus_pigs_expo %>%
forecast(h = 4)%>%
autoplot(aus_pigs) +
labs(title = "Four Month Forecast of Number of Pigs Slaughtered in Victoria") +
labs(subtitle = "Using Simple Exponential Smoothing")
#I created a four month forecast of the number of pigs slaughtered in Victoria using the simple exponential smoothing model.
#PROBLEM 1B
pigs_forecast <- aus_pigs_expo %>%
forecast(h = 4)
pigs_forecast
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95295, 4.9e+07) 95295.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95295, 5.2e+07) 95295.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95295, 5.4e+07) 95295.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95295, 5.7e+07) 95295.
sqrt(49463947)
## [1] 7033.061
95294.7 + 1.96*sqrt(49463947)
## [1] 109079.5
#Manually computed upper bound of the 95% prediction interval
95294.7 - 1.96*sqrt(49463947)
## [1] 81509.9
#Manually computed lower bound of the 95% prediction interval
pigs_forecast %>%
hilo(95)
## # A tsibble: 4 x 7 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean `95%`
## <fct> <fct> <chr> <mth> <dist> <dbl> <hilo>
## 1 Pigs Victor… "ETS(… 2019 Jan N(95295, 4.9e+07) 95295. [81510.15, 109079.2]95
## 2 Pigs Victor… "ETS(… 2019 Feb N(95295, 5.2e+07) 95295. [81165.48, 109423.9]95
## 3 Pigs Victor… "ETS(… 2019 Mar N(95295, 5.4e+07) 95295. [80829.01, 109760.4]95
## 4 Pigs Victor… "ETS(… 2019 Apr N(95295, 5.7e+07) 95295. [80500.19, 110089.2]95
#The manually prediction interval are slightly more precise than the prediction interval generated by the hilo function in R. #PROBLEM 2A
irish_econ <- global_economy %>%
filter(Country == "Ireland")
irish_train <- irish_econ %>%
filter(Year <= year(as.Date("2015-01-01")))
irish_econ %>%
autoplot(Exports) +
labs(title = "Irish Exports")
#I chose Ireland as my country because I have a lot of Irish ancestry in my Mom’s side of the family. I went ahead and created a training set for the data, leaving the the last two years to be forecasted. There is a very obious upward trend in the data that begins in roughly 1973. There doesn’t appear to be much seasonality. The big dip we see towards the end of the data likely corresponds to the Great Recession in 2008. I don’t imagine that Ireland (or anywhere else in the world) would have gotten through that time unscathed.
#Problem 2B
irish_econ_expo <- irish_train %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
irish_econ_expo %>%
forecast(h = 10)%>%
autoplot(irish_train) +
labs(title = "Ten Year Forecast of Irish Exports") +
labs(subtitle = "Using ETS(A,N,N)")
#I used an ANN model to forecast the next ten years of exports in Ireland. I don’t trust this forecast much at all because it does not incorporate any trend into the forecast.
#PROBLEM 2C
accuracy(irish_econ_expo)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ireland "ETS(Exports ~ … Trai… 1.70 4.07 2.97 2.39 4.73 0.982 0.991 0.341
#I used the accuracy function to compute the RMSE score for my training data using the ANN model. My RMSE score is 4.072052.
#PROBLEM 2D
irish_econ_linear <- irish_train %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(irish_econ_linear)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ireland "ETS(Exports ~ … Trai… 0.553 3.72 2.74 0.501 4.47 0.907 0.904 0.314
#The RMSE score for the Linear Trend model is lower than the simple exponential model. I think this makes sense becuase of the heavy upward trend in the data. I think that because of the trend in the data, a model without a trend component doesn’t make any sense. Because of this, I don’t think the ANN model has any merit when it comes to forecasting this data. The Linear trend (AAN) model should be just a straight upgrade in terms of fit and accuracy based off of my knoweledge of the two models.
#PROBLEM 2E
irish_econ_linear %>%
forecast(h = 10)%>%
autoplot(irish_train) +
labs(title = "Ten Year Forecast of Irish Exports") +
labs(subtitle = "Using ETS(A,A,N)")
#The forecast using the Linear Trend model looks a lot better than the simple exponential model. Again, I think the AAN model is unambigiously better than the ANN model for forecasting this data.
#PROBLEM 3
china_econ <- global_economy %>%
filter(Country == "China")
autoplot(china_econ) +
labs(title = "Chinese GDP")
## Plot variable not specified, automatically selected `.vars = GDP`
#Filtered the global_econ data to pick out China. Then graphed the Chinese GDP using autoplot.
china_log <- china_econ %>%
mutate(GDP = log(GDP))
autoplot(china_log) +
labs(title = "Logged Chinese GDP")
## Plot variable not specified, automatically selected `.vars = GDP`
#In order to make life easier for myself, I created another data set of the Chinese economy, this time with GDP being set equal to the log of the regular Chinese GDP. The above graph shows the logged GDP of China.
china_ann <- china_econ %>%
model(ETS(GDP ~ error("A") + trend("N") + season("N")))
china_ann %>%
forecast(h = 20)%>%
autoplot(china_econ) +
labs(title = "Twenty Year Forecast of Chinese GDP") +
labs(subtitle = "Using ETS(A,N,N)")
#Forecasted the Chinese GDP twenty years into the future using an ANN model. I am willing to bet that a chimpanzee could tell you why this forecast looks bad. The gigantic upward trend is almost completely ignored by this model.
china_lann <- china_log %>%
model(ETS(GDP ~ error("A") + trend("N") + season("N")))
china_lann %>%
forecast(h = 20)%>%
autoplot(china_log) +
labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
labs(subtitle = "Using ETS(A,N,N)")
#Did the same thing as last time, just with the logged GDP data. Still looks pretty bad in my opinion.
china_aan <- china_econ %>%
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
china_aan %>%
forecast(h = 20)%>%
autoplot(china_econ) +
labs(title = "Twenty Year Forecast of Chinese GDP") +
labs(subtitle = "Using ETS(A,A,N)")
#I created a model and forecast of the Chinese GDP, this time using an A,A,N model, which incoporates a trend component. I am still not convinced that this model accurately forecasts the Chinese GDP, at least in the near future, because the original plot shows that GDP is growing exponentially. This model incorporates a LINEAR trend, meaning I think that the model is grossly underestimating just how much China’s GDP will grow over twenty years.
china_laan <- china_log %>%
model(ETS(GDP ~ error("A") + trend("A") + season("N")))
china_laan %>%
forecast(h = 20)%>%
autoplot(china_log) +
labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
labs(subtitle = "Using ETS(A,A,N)")
#Exact same as before, just with the logged GDP data.
china_econ %>%
model(
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china_econ) +
labs(title = "Twenty Year Forecast of Chinese GDP") +
labs(subtitle = "Using ETS(A,A,N) with a Damping Component")
#Forecasted the Chinese GDP over the next twenty years, this time using a damped linear trend. This damping component just flattens out the forecasted values in the log run, which accounts for the fact that almost nothing continues linearly increasing forever. However, it still suffers from the same fundamental problem of the linear trend, that is that its being used to forecast exponentially growing data.
china_log %>%
model(
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china_log) +
labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
labs(subtitle = "Using ETS(A,A,N) with a Damping Component")
#Same as above, just with the logged GDP data.
#Overall, I don’t think any of these models accurately forecast the Chinese GDP. None of them capture the exponential growth that we see in the data. However, this does mean that a box-cox transformation is useful, becasue a log basically just removes an exponent. Using the logged data, we would have a much easier time computing prediction intervals.
#PROBLEM 4
aus_gas <- aus_production %>%
select("Gas")
autoplot(aus_gas) +
labs(title = "Australian Gas Production")
## Plot variable not specified, automatically selected `.vars = Gas`
#Created a data set from the aus_production set with only the gas prices. Upon plotting the data, its obvious that we’ll need a model that incorporates a seasonality component. From the looks of the plot, I would also say we need multiplicative seasonality, becuase the seasonal variation is changing over time.
aus_hw <- aus_gas %>%
model(
hw = ETS(Gas ~ error("M") + trend("A") + season("M")),
hwdamped = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))%>%
forecast(h = 36) %>%
autoplot(aus_gas) +
labs(title = "Nine Year Forecast of Australian Gas Production") +
labs(subtitle = "Using Holt-Winters Multiplicative Method")
aus_hw
#I forecasted the Australian gas production using the Holt-Winters Multiplicative Method, both with and without a damping component.
fit_hw <- aus_gas %>%
model(
hw = ETS(Gas ~ error("M") + trend("A") + season("M")))
fit_hwdamped <- aus_gas %>%
model(
hw = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
residuals(fit_hw)
## # A tsibble: 218 x 3 [1Q]
## # Key: .model [1]
## .model Quarter .resid
## <chr> <qtr> <dbl>
## 1 hw 1956 Q1 0.0181
## 2 hw 1956 Q2 -0.0958
## 3 hw 1956 Q3 0.0267
## 4 hw 1956 Q4 0.0901
## 5 hw 1957 Q1 -0.0387
## 6 hw 1957 Q2 0.0495
## 7 hw 1957 Q3 -0.0986
## 8 hw 1957 Q4 0.0358
## 9 hw 1958 Q1 -0.0350
## 10 hw 1958 Q2 0.0501
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows
residuals(fit_hwdamped)
## # A tsibble: 218 x 3 [1Q]
## # Key: .model [1]
## .model Quarter .resid
## <chr> <qtr> <dbl>
## 1 hw 1956 Q1 0.0429
## 2 hw 1956 Q2 -0.0871
## 3 hw 1956 Q3 0.0237
## 4 hw 1956 Q4 0.0877
## 5 hw 1957 Q1 -0.0456
## 6 hw 1957 Q2 0.0483
## 7 hw 1957 Q3 -0.0925
## 8 hw 1957 Q4 0.0339
## 9 hw 1958 Q1 -0.0381
## 10 hw 1958 Q2 0.0577
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows
#The damped HW model significantly decreased the the size of the residuals compared to the un-damped HW model. This means that the damped HW model is a better fit for our data. #PROBELM 5A
set.seed(0987654321)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
#Multiplicative seasonality is neede for this series because the seasonal variation is not constant, rather it is increasing in proportion to the level increase of the data.
#PROBLEM 5B
myseries_hw <- myseries %>%
model(
hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))%>%
forecast(h = 36) %>%
autoplot(myseries) +
labs(title = "Three Year Forecast of Australian Retail Turnover") +
labs(subtitle = "Using Holt-Winters Multiplicative Method")
myseries_hw
myseries_hwdamped <- myseries %>%
model(
hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) %>%
forecast(h = 36) %>%
autoplot(myseries) +
labs(title = "Three Year Forecast of Australian Retail Turnover") +
labs(subtitle = "Using Damped Holt-Winters Multiplicative Method")
myseries_hwdamped
#I forecasted Australian Retail Turnover using both the Holt-Winters
Multiplicative Method, both with and without a damping parameter.
#PROBLEM 5C
myseries %>%
stretch_tsibble(.init = 10) %>%
model(
hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M")))%>%
forecast(h = 1) %>%
accuracy(myseries)
## Warning: 8 errors (2 unique) encountered for hw
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: 8 errors (2 unique) encountered for hwdamped
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2019 Jan
## # A tibble: 2 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw Vict… Other … Test -0.0723 5.35 3.91 -0.310 5.80 0.700 0.743 0.335
## 2 hwdamp… Vict… Other … Test 0.379 5.43 3.87 0.309 5.72 0.693 0.754 0.241
## # … with abbreviated variable name ¹Industry
#The RMSE score for the un-damped and damped method are very similar, which lines up with how similar their forecasts are. I personally prefer the damped model, simply becuase I don’t think that in an industry that is as vital as retail, turover will continously grow. I think that there will always be people who need the money enough that they’d be willing to keep working their retail job.
#PORBLEM 5D
hwdamped <- myseries %>%
model(
hw = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
gg_tsresiduals(hwdamped)
#The residuals from the damped HW model do not resemble white noise according to the ACF plot.
#PROBLEM 5E
myseries %>%
filter(year(Month) < 2011) %>%
model(
snaive = SNAIVE(Turnover),
hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))
) %>%
forecast(h = "7 years") %>%
accuracy(myseries)
## # A tibble: 3 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw Vict… Other … Test -18.9 21.4 19.0 -23.3 23.4 3.44 2.96 0.480
## 2 hwdamped Vict… Other … Test -9.32 11.5 9.62 -11.9 12.3 1.75 1.59 0.311
## 3 snaive Vict… Other … Test -7.57 12.8 9.72 -9.65 12.3 1.76 1.77 0.478
## # … with abbreviated variable name ¹Industry
#According to the test set RMSE, the HW damped model is more accurate than the SNAIVE approach.
#PROBLEM 6 A
aus_trips <- tourism %>%
summarise(Trips = sum(Trips))
aus_trips %>%
autoplot(Trips)
#I summarized the total number of trips across Australia using the tourism data set. The data appears to be highly seasonal, with a slight downward trend until 2010, which then becomes a sharp upward trend.
#Problem 6B
decomp <- aus_trips %>%
model(STL(Trips)) %>%
components()
decomp %>%
as_tsibble() %>%
autoplot(season_adjust)
#Decomposed the data with STL and plotted the seasonally adjusted data.
#PROBLEM 6C
stletsdamped <- decomposition_model(
STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
aus_trips %>%
model(dcmp_AAdN = stletsdamped) %>%
forecast(h = "2 years") %>%
autoplot(aus_trips) +
labs(title = "2 Year Forecast of Overnight Trips in Australia")
#Forecasted the next two years of overnight trips in Australia using an additive damped trend method.
#PROBLEM 6D
stl_holt <- decomposition_model(
STL(Trips),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
aus_trips %>%
model(dcmp_AAN = stl_holt) %>%
forecast(h = "2 years") %>%
autoplot(aus_trips) +
labs(title = "2 Year Forecast of Overnight Trips in Australia")
#Forecasted the next two years using a linear trend model.
#PROBLEM 6E
aus_trips %>%
model(ets = ETS(Trips)) %>%
forecast(h = "2 years") %>%
autoplot(aus_trips)
#PROBLEM 6F
fit <- aus_trips %>%
model(
dcmp_AAdN = stletsdamped,
dcmp_AAN = stl_holt,
ets = ETS(Trips)
)
accuracy(fit)
## # A tibble: 3 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dcmp_AAdN Training 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
## 2 dcmp_AAN Training 99.7 763. 574. 0.359 2.71 0.604 0.628 -0.0182
## 3 ets Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
#The stl decomposition forecast using the additive trend model gives us a slightly better RMSE score.
#PROBLEM 6G
fit %>%
forecast(h = "2 years") %>%
autoplot(aus_trips, level = NULL)
#All three forecasts look very similar. Becuase it has the lowest RMSE score, I’d use the additive trend model.
best <- fit %>%
select(dcmp_AAN)
augment(best) %>% gg_tsdisplay(.resid, lag_max = 24, plot_type = "histogram")
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
augment(best) %>%
features(.innov, ljung_box, lag = 24, dof = 4)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 dcmp_AAN 33.3 0.0311
augment(best) %>%
features(.innov, ljung_box, lag = 24, dof = 4)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 dcmp_AAN 33.3 0.0311
#The residuals for our model have some autocorrelation, but not much. The null hypothesis of the lunch box test is rejected, meaning our residuals do not resemble white noise.
#PROBLEM 7A
nz_arrivals <- aus_arrivals %>%
filter(Origin == "NZ")
nz_arrivals %>%
autoplot(Arrivals / 1e3) + labs(y = "Thousands of people")
#Created a time plot of the arrivals to Australia from New Zealand. There is a clear upward trend begining around 1985 with some definite seasonality. The seasonal varatiation is growing with the level of the data, so multiplicative seasonal components need to be used.
#PROBLEM 7B
nz_train <- nz_arrivals %>%
slice(1:(n() - 8))
nz_train %>%
model(ETS(Arrivals ~ error("M") + trend("A") + season("M"))) %>%
forecast(h = "2 years") %>%
autoplot() +
autolayer(nz_arrivals, Arrivals)
#Plotted a two year forecast using a training set, using the HW multiplicative method.
#PROBLEM 7C #A multiplicative seasonal component is needed here because the seasonal variation is increasing in proportion to the level of the data.
#PROBLEM 7D
nz_fc <- nz_train %>%
model(
ets = ETS(Arrivals),
log_ets = ETS(log(Arrivals)),
snaive = SNAIVE(Arrivals),
stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
) %>%
forecast(h = "2 years")
nz_fc %>%
autoplot(level = NULL) +
autolayer(filter(nz_arrivals, year(Quarter) > 2000), Arrivals)
nz_fc %>%
autoplot(level = NULL) +
autolayer(nz_arrivals, Arrivals)
#PROBLEM 7G
nz_fc %>%
accuracy(nz_arrivals)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets NZ Test -3495. 14913. 11421. -0.964 3.78 0.768 0.771 -0.0260
## 2 log_ets NZ Test 2467. 13342. 11904. 1.03 4.03 0.800 0.689 -0.0786
## 3 snaive NZ Test 9709. 18051. 17156. 3.44 5.80 1.15 0.933 -0.239
## 4 stl NZ Test -12535. 22723. 16172. -4.02 5.23 1.09 1.17 0.109
#According to the RMSE scores, the log_ets model is the most accurate.
log_ets <- nz_train %>%
model(ETS(log(Arrivals)))
log_ets %>% gg_tsresiduals()
#The log_ets model also appears to pass the residual test based on the ACF.
augment(log_ets) %>%
features(.innov, ljung_box, lag = 12, dof = 6)
## # A tibble: 1 × 4
## Origin .model lb_stat lb_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 NZ ETS(log(Arrivals)) 11.0 0.0887
#The log_ets model also passes the lunch box test. Our residuals resemble white noise.
#PROBLEM 7F
nz_cv <- nz_arrivals %>%
slice(1:(n() - 3)) %>%
stretch_tsibble(.init = 36, .step = 3)
nz_cv %>%
model(
ets = ETS(Arrivals),
log_ets = ETS(log(Arrivals)),
snaive = SNAIVE(Arrivals),
stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
) %>%
forecast(h = 3) %>%
accuracy(nz_arrivals)
## # A tibble: 4 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets NZ Test 4627. 15327. 11799. 2.23 6.45 0.793 0.797 0.283
## 2 log_ets NZ Test 4388. 15047. 11566. 1.99 6.36 0.778 0.782 0.268
## 3 snaive NZ Test 8244. 18768. 14422. 3.83 7.76 0.970 0.976 0.566
## 4 stl NZ Test 4252. 15618. 11873. 2.04 6.25 0.798 0.812 0.244
#The log_ets model still seems to be the best when using time series cross validation. It still has the lowest RMSE score.
#PROBLEM 8A
cement_cv <- aus_production %>%
slice(1:(n() - 4)) %>%
stretch_tsibble(.init = 5 * 4)
cement_fc <- cement_cv %>%
model(ETS(Cement), SNAIVE(Cement)) %>%
forecast(h = "1 year")