Homework

library(readxl)
library(tidyquant)
library(fpp3)
library(moments)
library(tsibble)
library(tsibbledata)
library(ggplot2)
library(dplyr)
library(seasonal)
library(seasonalview)
library(fabletools)
library(fable)
library(ggfortify)
library(quantmod)
library(GGally)
library(feasts)

#PROBLEM 1A

aus_livestock

## # A tsibble: 29,364 x 4 [1M]
## # Key:       Animal, State [54]
##       Month Animal                     State                        Count
##       <mth> <fct>                      <fct>                        <dbl>
##  1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300
##  2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100
##  3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100
##  4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900
##  5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100
##  6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800
##  7 1977 Jan Bulls, bullocks and steers Australian Capital Territory  1800
##  8 1977 Feb Bulls, bullocks and steers Australian Capital Territory  1900
##  9 1977 Mar Bulls, bullocks and steers Australian Capital Territory  2700
## 10 1977 Apr Bulls, bullocks and steers Australian Capital Territory  2300
## # … with 29,354 more rows
## # ℹ Use `print(n = ...)` to see more rows

aus_pigs <- aus_livestock %>%
  filter(Animal == "Pigs") %>%
  filter(State == "Victoria") %>%
  filter(Month >= yearmonth(as.Date("2010-01-01")))
  
aus_pigs_expo <- aus_pigs %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

report(aus_pigs_expo)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.225021 
## 
##   Initial states:
##      l[0]
##  67178.49
## 
##   sigma^2:  49463947
## 
##      AIC     AICc      BIC 
## 2423.061 2423.292 2431.107

#I filtered the aus_livestock data set to only show us pigs slaughtered in Victoria from January 2010 and onwards. I then created a simple exponential smoothing model using the ETS function and ran a report to find the optimal smoothing patterns. The optimal value for alpha is equal to 0.225021 and the optimal value for l0 is equal to 67,178.49.

aus_pigs_expo %>%
  forecast(h = 4)%>%
  autoplot(aus_pigs) +
  labs(title = "Four Month Forecast of Number of Pigs Slaughtered in Victoria") +
  labs(subtitle = "Using Simple Exponential Smoothing")

#I created a four month forecast of the number of pigs slaughtered in Victoria using the simple exponential smoothing model.

#PROBLEM 1B

pigs_forecast <- aus_pigs_expo %>%
  forecast(h = 4)
pigs_forecast

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95295, 4.9e+07) 95295.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95295, 5.2e+07) 95295.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95295, 5.4e+07) 95295.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95295, 5.7e+07) 95295.

sqrt(49463947)

## [1] 7033.061

95294.7 + 1.96*sqrt(49463947)

## [1] 109079.5

#Manually computed upper bound of the 95% prediction interval

95294.7 - 1.96*sqrt(49463947)

## [1] 81509.9

#Manually computed lower bound of the 95% prediction interval

pigs_forecast %>%
  hilo(95)

## # A tsibble: 4 x 7 [1M]
## # Key:       Animal, State, .model [1]
##   Animal State   .model    Month             Count  .mean                  `95%`
##   <fct>  <fct>   <chr>     <mth>            <dist>  <dbl>                 <hilo>
## 1 Pigs   Victor… "ETS(… 2019 Jan N(95295, 4.9e+07) 95295. [81510.15, 109079.2]95
## 2 Pigs   Victor… "ETS(… 2019 Feb N(95295, 5.2e+07) 95295. [81165.48, 109423.9]95
## 3 Pigs   Victor… "ETS(… 2019 Mar N(95295, 5.4e+07) 95295. [80829.01, 109760.4]95
## 4 Pigs   Victor… "ETS(… 2019 Apr N(95295, 5.7e+07) 95295. [80500.19, 110089.2]95

#The manually prediction interval are slightly more precise than the prediction interval generated by the hilo function in R. #PROBLEM 2A

irish_econ <- global_economy %>%
  filter(Country == "Ireland")

irish_train <- irish_econ %>%
  filter(Year <= year(as.Date("2015-01-01")))


irish_econ %>%
  autoplot(Exports) +
  labs(title = "Irish Exports")

#I chose Ireland as my country because I have a lot of Irish ancestry in my Mom’s side of the family. I went ahead and created a training set for the data, leaving the the last two years to be forecasted. There is a very obious upward trend in the data that begins in roughly 1973. There doesn’t appear to be much seasonality. The big dip we see towards the end of the data likely corresponds to the Great Recession in 2008. I don’t imagine that Ireland (or anywhere else in the world) would have gotten through that time unscathed.

#Problem 2B

irish_econ_expo <- irish_train %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))


irish_econ_expo %>%
  forecast(h = 10)%>%
  autoplot(irish_train) +
  labs(title = "Ten Year Forecast of Irish Exports") +
  labs(subtitle = "Using ETS(A,N,N)")

#I used an ANN model to forecast the next ten years of exports in Ireland. I don’t trust this forecast much at all because it does not incorporate any trend into the forecast.

#PROBLEM 2C

accuracy(irish_econ_expo)

## # A tibble: 1 × 11
##   Country .model           .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>            <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ireland "ETS(Exports ~ … Trai…  1.70  4.07  2.97  2.39  4.73 0.982 0.991 0.341

#I used the accuracy function to compute the RMSE score for my training data using the ANN model. My RMSE score is 4.072052.

#PROBLEM 2D

irish_econ_linear <- irish_train %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))

accuracy(irish_econ_linear)

## # A tibble: 1 × 11
##   Country .model           .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>            <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Ireland "ETS(Exports ~ … Trai… 0.553  3.72  2.74 0.501  4.47 0.907 0.904 0.314

#The RMSE score for the Linear Trend model is lower than the simple exponential model. I think this makes sense becuase of the heavy upward trend in the data. I think that because of the trend in the data, a model without a trend component doesn’t make any sense. Because of this, I don’t think the ANN model has any merit when it comes to forecasting this data. The Linear trend (AAN) model should be just a straight upgrade in terms of fit and accuracy based off of my knoweledge of the two models.

#PROBLEM 2E

irish_econ_linear %>%
  forecast(h = 10)%>%
  autoplot(irish_train) +
  labs(title = "Ten Year Forecast of Irish Exports") +
  labs(subtitle = "Using ETS(A,A,N)")

#The forecast using the Linear Trend model looks a lot better than the simple exponential model. Again, I think the AAN model is unambigiously better than the ANN model for forecasting this data.

#PROBLEM 3

china_econ <- global_economy %>%
  filter(Country == "China")

autoplot(china_econ) +
  labs(title = "Chinese GDP")

## Plot variable not specified, automatically selected `.vars = GDP`

#Filtered the global_econ data to pick out China. Then graphed the Chinese GDP using autoplot.

china_log <- china_econ %>%
  mutate(GDP = log(GDP))

autoplot(china_log) +
  labs(title = "Logged Chinese GDP")

## Plot variable not specified, automatically selected `.vars = GDP`

#In order to make life easier for myself, I created another data set of the Chinese economy, this time with GDP being set equal to the log of the regular Chinese GDP. The above graph shows the logged GDP of China.

china_ann <- china_econ %>%
  model(ETS(GDP ~ error("A") + trend("N") + season("N")))

china_ann %>%
  forecast(h = 20)%>%
  autoplot(china_econ) +
  labs(title = "Twenty Year Forecast of Chinese GDP") +
  labs(subtitle = "Using ETS(A,N,N)")

#Forecasted the Chinese GDP twenty years into the future using an ANN model. I am willing to bet that a chimpanzee could tell you why this forecast looks bad. The gigantic upward trend is almost completely ignored by this model.

china_lann <- china_log %>%
  model(ETS(GDP ~ error("A") + trend("N") + season("N")))

china_lann %>%
  forecast(h = 20)%>%
  autoplot(china_log) +
  labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
  labs(subtitle = "Using ETS(A,N,N)")

#Did the same thing as last time, just with the logged GDP data. Still looks pretty bad in my opinion.

china_aan <- china_econ %>%
  model(ETS(GDP ~ error("A") + trend("A") + season("N")))

china_aan %>%
  forecast(h = 20)%>%
  autoplot(china_econ) +
  labs(title = "Twenty Year Forecast of Chinese GDP") +
  labs(subtitle = "Using ETS(A,A,N)")

#I created a model and forecast of the Chinese GDP, this time using an A,A,N model, which incoporates a trend component. I am still not convinced that this model accurately forecasts the Chinese GDP, at least in the near future, because the original plot shows that GDP is growing exponentially. This model incorporates a LINEAR trend, meaning I think that the model is grossly underestimating just how much China’s GDP will grow over twenty years.

china_laan <- china_log %>%
  model(ETS(GDP ~ error("A") + trend("A") + season("N")))

china_laan %>%
  forecast(h = 20)%>%
  autoplot(china_log) +
  labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
  labs(subtitle = "Using ETS(A,A,N)")

#Exact same as before, just with the logged GDP data.

china_econ %>%
  model(
    `Damped Holt's method` = ETS(GDP ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))) %>%
  forecast(h = 20) %>%
  autoplot(china_econ) +
  labs(title = "Twenty Year Forecast of Chinese GDP") +
  labs(subtitle = "Using ETS(A,A,N) with a Damping Component")

#Forecasted the Chinese GDP over the next twenty years, this time using a damped linear trend. This damping component just flattens out the forecasted values in the log run, which accounts for the fact that almost nothing continues linearly increasing forever. However, it still suffers from the same fundamental problem of the linear trend, that is that its being used to forecast exponentially growing data.

china_log %>%
  model(
    `Damped Holt's method` = ETS(GDP ~ error("A") +
                       trend("Ad", phi = 0.9) + season("N"))) %>%
  forecast(h = 20) %>%
  autoplot(china_log) +
  labs(title = "Twenty Year Forecast of Chinese Logged GDP") +
  labs(subtitle = "Using ETS(A,A,N) with a Damping Component")

#Same as above, just with the logged GDP data.

#Overall, I don’t think any of these models accurately forecast the Chinese GDP. None of them capture the exponential growth that we see in the data. However, this does mean that a box-cox transformation is useful, becasue a log basically just removes an exponent. Using the logged data, we would have a much easier time computing prediction intervals.

#PROBLEM 4

aus_gas <- aus_production %>%
  select("Gas")

autoplot(aus_gas) +
  labs(title = "Australian Gas Production")

## Plot variable not specified, automatically selected `.vars = Gas`

#Created a data set from the aus_production set with only the gas prices. Upon plotting the data, its obvious that we’ll need a model that incorporates a seasonality component. From the looks of the plot, I would also say we need multiplicative seasonality, becuase the seasonal variation is changing over time.

aus_hw <- aus_gas %>%
  model(
    hw = ETS(Gas ~ error("M") + trend("A") + season("M")),
    hwdamped = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))%>%
  forecast(h = 36) %>%
  autoplot(aus_gas) +
  labs(title = "Nine Year Forecast of Australian Gas Production") +
  labs(subtitle = "Using Holt-Winters Multiplicative Method") 
aus_hw

#I forecasted the Australian gas production using the Holt-Winters Multiplicative Method, both with and without a damping component.

fit_hw <- aus_gas %>%
  model(
    hw = ETS(Gas ~ error("M") + trend("A") + season("M")))

fit_hwdamped <- aus_gas %>%
  model(
    hw = ETS(Gas ~ error("M") + trend("Ad", phi = 0.9) + season("M")))
residuals(fit_hw)

## # A tsibble: 218 x 3 [1Q]
## # Key:       .model [1]
##    .model Quarter  .resid
##    <chr>    <qtr>   <dbl>
##  1 hw     1956 Q1  0.0181
##  2 hw     1956 Q2 -0.0958
##  3 hw     1956 Q3  0.0267
##  4 hw     1956 Q4  0.0901
##  5 hw     1957 Q1 -0.0387
##  6 hw     1957 Q2  0.0495
##  7 hw     1957 Q3 -0.0986
##  8 hw     1957 Q4  0.0358
##  9 hw     1958 Q1 -0.0350
## 10 hw     1958 Q2  0.0501
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows

residuals(fit_hwdamped)

## # A tsibble: 218 x 3 [1Q]
## # Key:       .model [1]
##    .model Quarter  .resid
##    <chr>    <qtr>   <dbl>
##  1 hw     1956 Q1  0.0429
##  2 hw     1956 Q2 -0.0871
##  3 hw     1956 Q3  0.0237
##  4 hw     1956 Q4  0.0877
##  5 hw     1957 Q1 -0.0456
##  6 hw     1957 Q2  0.0483
##  7 hw     1957 Q3 -0.0925
##  8 hw     1957 Q4  0.0339
##  9 hw     1958 Q1 -0.0381
## 10 hw     1958 Q2  0.0577
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows

#The damped HW model significantly decreased the the size of the residuals compared to the un-damped HW model. This means that the damped HW model is a better fit for our data. #PROBELM 5A

set.seed(0987654321)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)

#Multiplicative seasonality is neede for this series because the seasonal variation is not constant, rather it is increasing in proportion to the level increase of the data.

#PROBLEM 5B

myseries_hw <- myseries %>% 
  model(
    hw = ETS(Turnover ~ error("M") + trend("A") + season("M")))%>%
  forecast(h = 36) %>%
  autoplot(myseries) +
  labs(title = "Three Year Forecast of Australian Retail Turnover") +
  labs(subtitle = "Using Holt-Winters Multiplicative Method") 
myseries_hw

myseries_hwdamped <- myseries %>% 
  model(
    hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))) %>%
    forecast(h = 36) %>%
  autoplot(myseries) +
  labs(title = "Three Year Forecast of Australian Retail Turnover") +
  labs(subtitle = "Using Damped Holt-Winters Multiplicative Method") 
myseries_hwdamped

#I forecasted Australian Retail Turnover using both the Holt-Winters Multiplicative Method, both with and without a damping parameter.

#PROBLEM 5C

myseries %>%
  stretch_tsibble(.init = 10) %>%
  model(
    hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M")))%>%
  forecast(h = 1) %>%
  accuracy(myseries)

## Warning: 8 errors (2 unique) encountered for hw
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: 8 errors (2 unique) encountered for hwdamped
## [3] A seasonal ETS model cannot be used for this data.
## [5] Not enough data to estimate this ETS model.

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2019 Jan

## # A tibble: 2 × 12
##   .model  State Indus…¹ .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr> <chr>   <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw      Vict… Other … Test  -0.0723  5.35  3.91 -0.310  5.80 0.700 0.743 0.335
## 2 hwdamp… Vict… Other … Test   0.379   5.43  3.87  0.309  5.72 0.693 0.754 0.241
## # … with abbreviated variable name ¹Industry

#The RMSE score for the un-damped and damped method are very similar, which lines up with how similar their forecasts are. I personally prefer the damped model, simply becuase I don’t think that in an industry that is as vital as retail, turover will continously grow. I think that there will always be people who need the money enough that they’d be willing to keep working their retail job.

#PORBLEM 5D

hwdamped <- myseries %>%
  model(
    hw = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M")))

gg_tsresiduals(hwdamped)

#The residuals from the damped HW model do not resemble white noise according to the ACF plot.

#PROBLEM 5E

myseries %>%
  filter(year(Month) < 2011) %>%
  model(
    snaive = SNAIVE(Turnover),
    hw = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    hwdamped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))
  ) %>%
  forecast(h = "7 years") %>%
  accuracy(myseries)

## # A tibble: 3 × 12
##   .model   State Indus…¹ .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr> <chr>   <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hw       Vict… Other … Test  -18.9   21.4 19.0  -23.3   23.4  3.44  2.96 0.480
## 2 hwdamped Vict… Other … Test   -9.32  11.5  9.62 -11.9   12.3  1.75  1.59 0.311
## 3 snaive   Vict… Other … Test   -7.57  12.8  9.72  -9.65  12.3  1.76  1.77 0.478
## # … with abbreviated variable name ¹Industry

#According to the test set RMSE, the HW damped model is more accurate than the SNAIVE approach.

#PROBLEM 6 A

aus_trips <- tourism %>%
  summarise(Trips = sum(Trips))
aus_trips %>%
  autoplot(Trips)

#I summarized the total number of trips across Australia using the tourism data set. The data appears to be highly seasonal, with a slight downward trend until 2010, which then becomes a sharp upward trend.

#Problem 6B

decomp <- aus_trips %>%
  model(STL(Trips)) %>%
  components()
decomp %>%
  as_tsibble() %>%
  autoplot(season_adjust)

#Decomposed the data with STL and plotted the seasonally adjusted data.

#PROBLEM 6C

stletsdamped <- decomposition_model(
  STL(Trips),
  ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))
)
aus_trips %>%
  model(dcmp_AAdN = stletsdamped) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips) +
  labs(title = "2 Year Forecast of Overnight Trips in Australia")

#Forecasted the next two years of overnight trips in Australia using an additive damped trend method.

#PROBLEM 6D

stl_holt <- decomposition_model(
  STL(Trips),
  ETS(season_adjust ~ error("A") + trend("A") + season("N"))
)
aus_trips %>%
  model(dcmp_AAN = stl_holt) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips) +
  labs(title = "2 Year Forecast of Overnight Trips in Australia")

#Forecasted the next two years using a linear trend model.

#PROBLEM 6E

aus_trips %>%
  model(ets = ETS(Trips)) %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips)

#PROBLEM 6F

fit <- aus_trips %>%
  model(
    dcmp_AAdN = stletsdamped,
    dcmp_AAN = stl_holt,
    ets = ETS(Trips)
  )
accuracy(fit)

## # A tibble: 3 × 10
##   .model    .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>     <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 dcmp_AAdN Training 103.   763.  576. 0.367  2.72 0.607 0.629 -0.0174 
## 2 dcmp_AAN  Training  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182 
## 3 ets       Training 105.   794.  604. 0.379  2.86 0.636 0.653 -0.00151

#The stl decomposition forecast using the additive trend model gives us a slightly better RMSE score.

#PROBLEM 6G

fit %>%
  forecast(h = "2 years") %>%
  autoplot(aus_trips, level = NULL)

#All three forecasts look very similar. Becuase it has the lowest RMSE score, I’d use the additive trend model.

best <- fit %>%
  select(dcmp_AAN)
augment(best) %>% gg_tsdisplay(.resid, lag_max = 24, plot_type = "histogram")

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

augment(best) %>%
  features(.innov, ljung_box, lag = 24, dof = 4)

## # A tibble: 1 × 3
##   .model   lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 dcmp_AAN    33.3    0.0311

augment(best) %>%
  features(.innov, ljung_box, lag = 24, dof = 4)

## # A tibble: 1 × 3
##   .model   lb_stat lb_pvalue
##   <chr>      <dbl>     <dbl>
## 1 dcmp_AAN    33.3    0.0311

#The residuals for our model have some autocorrelation, but not much. The null hypothesis of the lunch box test is rejected, meaning our residuals do not resemble white noise.

#PROBLEM 7A

nz_arrivals <- aus_arrivals %>% 
  filter(Origin == "NZ")
nz_arrivals %>% 
  autoplot(Arrivals / 1e3) + labs(y = "Thousands of people")

#Created a time plot of the arrivals to Australia from New Zealand. There is a clear upward trend begining around 1985 with some definite seasonality. The seasonal varatiation is growing with the level of the data, so multiplicative seasonal components need to be used.

#PROBLEM 7B

nz_train <- nz_arrivals %>%
  slice(1:(n() - 8))
nz_train %>%
  model(ETS(Arrivals ~ error("M") + trend("A") + season("M"))) %>%
  forecast(h = "2 years") %>%
  autoplot() +
  autolayer(nz_arrivals, Arrivals)

#Plotted a two year forecast using a training set, using the HW multiplicative method.

#PROBLEM 7C #A multiplicative seasonal component is needed here because the seasonal variation is increasing in proportion to the level of the data.

#PROBLEM 7D

nz_fc <- nz_train %>%
  model(
    ets = ETS(Arrivals),
    log_ets = ETS(log(Arrivals)),
    snaive = SNAIVE(Arrivals),
    stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
  ) %>%
  forecast(h = "2 years")
nz_fc %>%
  autoplot(level = NULL) +
  autolayer(filter(nz_arrivals, year(Quarter) > 2000), Arrivals)

nz_fc %>%
  autoplot(level = NULL) +
  autolayer(nz_arrivals, Arrivals)

#PROBLEM 7G

nz_fc %>%
  accuracy(nz_arrivals)

## # A tibble: 4 × 11
##   .model  Origin .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>   <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ets     NZ     Test   -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 2 log_ets NZ     Test    2467. 13342. 11904.  1.03   4.03 0.800 0.689 -0.0786
## 3 snaive  NZ     Test    9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239 
## 4 stl     NZ     Test  -12535. 22723. 16172. -4.02   5.23 1.09  1.17   0.109

#According to the RMSE scores, the log_ets model is the most accurate.

log_ets <- nz_train %>%
  model(ETS(log(Arrivals)))
log_ets %>% gg_tsresiduals()

#The log_ets model also appears to pass the residual test based on the ACF.

augment(log_ets) %>%
  features(.innov, ljung_box, lag = 12, dof = 6)

## # A tibble: 1 × 4
##   Origin .model             lb_stat lb_pvalue
##   <chr>  <chr>                <dbl>     <dbl>
## 1 NZ     ETS(log(Arrivals))    11.0    0.0887

#The log_ets model also passes the lunch box test. Our residuals resemble white noise.

#PROBLEM 7F

nz_cv <- nz_arrivals %>%
  slice(1:(n() - 3)) %>%
  stretch_tsibble(.init = 36, .step = 3)
nz_cv %>%
  model(
    ets = ETS(Arrivals),
    log_ets = ETS(log(Arrivals)),
    snaive = SNAIVE(Arrivals),
    stl = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))
  ) %>%
  forecast(h = 3) %>%
  accuracy(nz_arrivals)

## # A tibble: 4 × 11
##   .model  Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ets     NZ     Test  4627. 15327. 11799.  2.23  6.45 0.793 0.797 0.283
## 2 log_ets NZ     Test  4388. 15047. 11566.  1.99  6.36 0.778 0.782 0.268
## 3 snaive  NZ     Test  8244. 18768. 14422.  3.83  7.76 0.970 0.976 0.566
## 4 stl     NZ     Test  4252. 15618. 11873.  2.04  6.25 0.798 0.812 0.244

#The log_ets model still seems to be the best when using time series cross validation. It still has the lowest RMSE score.

#PROBLEM 8A

cement_cv <- aus_production %>%
  slice(1:(n() - 4)) %>%
  stretch_tsibble(.init = 5 * 4)
cement_fc <- cement_cv %>%
  model(ETS(Cement), SNAIVE(Cement)) %>%
  forecast(h = "1 year")

Homework_6

Peter Cook

2022-11-07