HW5

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.3.3

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2

## Warning: package 'dplyr' was built under R version 4.3.2

## Warning: package 'tidyr' was built under R version 4.3.2

## Warning: package 'lubridate' was built under R version 4.3.2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tsibble' was built under R version 4.3.3

## Warning: package 'tsibbledata' was built under R version 4.3.3

## Warning: package 'feasts' was built under R version 4.3.3

## Warning: package 'fabletools' was built under R version 4.3.3

## Warning: package 'fable' was built under R version 4.3.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(ggplot2)
library(fable)
library(fabletools)
library(feasts)
library(tsibble)
library(dplyr)

\[8.1\]

#A

victoria_pigs_slaughtered <- aus_livestock %>%
  filter(State == "Victoria" & Animal == "Pigs")

victoria_pigs_slaughtered_fit <- victoria_pigs_slaughtered %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))

victoria_pigs_slaughtered_forecast <- victoria_pigs_slaughtered_fit %>%
  forecast(h = 4)

victoria_pigs_slaughtered_forecast %>%
  autoplot(victoria_pigs_slaughtered %>% filter(Month >= yearmonth("2010 Jan"))) +
  labs(y = "Count", title = "Victoria Pigs Slaughtered") +
  guides(colour = "none")

report(victoria_pigs_slaughtered_fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

The output above shows us that the parameter estimate for α=0.3221247. The estimate for the initial state ℓ0 is 100646.6.

#B.

resid_std <- sd(augment(victoria_pigs_slaughtered_fit)$.resid)

sprintf(
  "confidence interval: [%f, %f]", 
  victoria_pigs_slaughtered_forecast$.mean[1] - (resid_std * 1.96),
  victoria_pigs_slaughtered_forecast$.mean[1] + (resid_std * 1.96)
  )

## [1] "confidence interval: [76871.012478, 113502.102384]"

The output above shows us that the 95% prediction interval for the first forecast is between 76871.01 and 113502.1 when computed using the y±1.96s formula.

hilo(victoria_pigs_slaughtered_forecast$Count, 95)

## <hilo[4]>
## [1] [76854.79, 113518.3]95 [75927.17, 114445.9]95 [75042.22, 115330.9]95
## [4] [74194.54, 116178.6]95

\[8.5\]

#A.
aus_economy <- global_economy %>%
  filter(Country == "Australia")

aus_economy %>%
  autoplot(Exports)

## The plot shows us an increasing trend in the series. The trend looks like it has begun to taper off starting in 2010.

#B.
aus_economy %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N"))
  ) %>%
  forecast(h = 15) %>%
  autoplot(aus_economy) +
  labs(title = "Australian population") +
  guides(colour = guide_legend(title = "Forecast"))

#C.

aus_economy %>%
  stretch_tsibble(.init = 10) %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N"))
  ) %>%
  forecast(h = 15) %>%
  accuracy(aus_economy)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 15 observations are missing between 2018 and 2032

## # A tibble: 1 × 11
##   .model Country   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES    Australia Test   1.55  2.38  1.90  8.43  10.4  1.93  1.93 0.667

The output above shows us that the RMSE for the training data is 2.381248.

#D.
aus_economy %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))
  ) %>%
  forecast(h = 15) %>%
  autoplot(aus_economy, level = NULL) +
  labs(title = "Australian population") +
  guides(colour = guide_legend(title = "Forecast"))

The increasing trend of this data makes a SES model inappropriate for this type of data. Holt’s method looks like it works well with this data, since the exports show an increasing trend as the years go by.

#E.
aus_economy %>%
  stretch_tsibble(.init = 10) %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))
  ) %>%
  forecast(h = 15) %>%
  accuracy(aus_economy)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 15 observations are missing between 2018 and 2032

## # A tibble: 2 × 11
##   .model Country   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt   Australia Test  0.932  2.30  1.84  5.47  10.3  1.87  1.86 0.776
## 2 SES    Australia Test  1.55   2.38  1.90  8.43  10.4  1.93  1.93 0.667

aus_economy_model <- aus_economy %>%
  model(
    SES = ETS(Exports ~ error("A") + trend("N") + season("N")),
    Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))
  )

aus_economy_forecasts <- aus_economy_model %>%
  forecast(h = 15)

aus_economy_forecasts %>%
  group_by(.model) %>%
  mutate(interval = hilo(Exports, 95)) %>%
  filter(row_number() == 1)

## # A tsibble: 2 x 6 [1Y]
## # Key:       Country, .model [2]
## # Groups:    .model [2]
##   Country   .model  Year    Exports .mean               interval
##   <fct>     <chr>  <dbl>     <dist> <dbl>                 <hilo>
## 1 Australia SES     2018 N(21, 1.4)  20.6 [18.31970, 22.89462]95
## 2 Australia Holt    2018 N(21, 1.3)  20.8 [18.57028, 23.10700]95

If we go off of the RMSE, than we see that Holt’s method has a better fit to the training data than SES. If we go off of the 95% CI prediction interval for the first forecast, we can see that Holt’s method is still better (by a small margin). Therefore, I think that Holt’s method is the better one out of the 2.

#F.
aus_economy_augment <- aus_economy_model %>%
  augment()

resid_std_ses <- sd((aus_economy_augment %>% filter(.model == "SES"))$.resid)
resid_std_holt <- sd((aus_economy_augment %>% filter(.model == "Holt"))$.resid)

sprintf(
  "Calculated confidence interval (SES): [%f, %f]", 
  aus_economy_forecasts$.mean[1] - (resid_std_ses * 1.96),
  aus_economy_forecasts$.mean[1] + (resid_std_ses * 1.96)
  )

## [1] "Calculated confidence interval (SES): [18.386715, 22.827603]"

\[8.6\]

china_economy <- global_economy %>% 
  filter(Country == "China") %>%
  mutate(GDP_per_capita = GDP/Population)

china_economy %>%
  autoplot(GDP_per_capita)

china_economy %>%
  model(
    `SES` = ETS(GDP_per_capita ~ error("A") + trend("N") + season("N")),
    `Damped Holt's method (phi = 0.8)` = ETS(GDP_per_capita ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    `Damped Holt's method (phi = 0.85)` = ETS(GDP_per_capita ~ error("A") + trend("Ad", phi = 0.85) + season("N")),
    `Damped Holt's method (phi = 0.9)` = ETS(GDP_per_capita ~ error("A") + trend("Ad", phi = 0.9) + season("N")),
    `Damped Holt's method (phi = 0.98)` = ETS(GDP_per_capita ~ error("A") + trend("Ad", phi = 0.98) + season("N")),
  ) %>%
  forecast(h = 15) %>%
  autoplot(china_economy, level = NULL) +
  labs(title = "China GDP Per Capita") +
  guides(colour = guide_legend(title = "Forecast"))

## As we can see here, as we decrease the damping parameter ϕ, the forecasts begin to level out to a constant at an earlier time. By forecasting 15 years into the future, we can see that we see significant tapering at ϕ=0.8. This tapering becomes less evident as we increase ϕ

lambda <- china_economy %>%
  features(GDP_per_capita, features = guerrero) |>
  pull(lambda_guerrero)

china_economy %>%
  model(
    `SES` = ETS(box_cox(GDP_per_capita, lambda) ~ error("A") + trend("N") + season("N")),
    `Damped Holt's method (phi = 0.8)` = ETS(box_cox(GDP_per_capita, lambda) ~ error("A") + trend("Ad", phi = 0.8) + season("N")),
    `Damped Holt's method (phi = 0.85)` = ETS(box_cox(GDP_per_capita, lambda) ~ error("A") + trend("Ad", phi = 0.85) + season("N")),
    `Damped Holt's method (phi = 0.9)` = ETS(box_cox(GDP_per_capita, lambda) ~ error("A") + trend("Ad", phi = 0.9) + season("N")),
    `Damped Holt's method (phi = 0.98)` = ETS(box_cox(GDP_per_capita, lambda) ~ error("A") + trend("Ad", phi = 0.98) + season("N")),
  ) %>%
  forecast(h = 15) %>%
  autoplot(china_economy, level = NULL) +
  labs(title = "China GDP Per Capita") +
  guides(colour = guide_legend(title = "Forecast"))

## Applying a Box-Cox Transformation has resulted in the following forecasts. As we increase phi, the slopes of the forecasts increase. With that being said, current UN projections suggest that the population of China will stagnate by 2029. Therefore, the forecasts from Box-Cox transforming the original data should definitely not be reported.

\[8.7\]

aus_production %>%
  autoplot(Gas)

aus_gas <- aus_production %>%
  select(Gas)

fit <- aus_gas %>%
  model(ETS(Gas))

report(fit)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

fit %>%
  accuracy()

## # A tibble: 1 × 10
##   .model   .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>    <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(Gas) Training -0.115  4.60  3.02 0.199  4.08 0.542 0.606 -0.0131

fit %>% 
  forecast(h = 15) %>%
  autoplot(aus_gas) +
  labs(title = "Australia Gas Production (Original Data)") +
  guides(colour = guide_legend(title = "Forecast"))

lambda <- aus_gas |>
  features(Gas, features = guerrero) |>
  pull(lambda_guerrero)

aus_gas <- aus_gas %>%
  mutate(aus_gas_transformed = box_cox(Gas, lambda))

fit_box_cox <- aus_gas %>%
  model(ETS(aus_gas_transformed))

report(fit_box_cox)

## Series: aus_gas_transformed 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.81796 
##     beta  = 0.1216283 
##     gamma = 0.0001000577 
## 
##   Initial states:
##      l[0]       b[0]       s[0]     s[-1]     s[-2]      s[-3]
##  1.888944 0.03198818 -0.1065298 0.2508721 0.1003452 -0.2446875
## 
##   sigma^2:  0.0059
## 
##      AIC     AICc      BIC 
## 63.28945 64.15484 93.74991

fit_box_cox %>%
  accuracy()

## # A tibble: 1 × 10
##   .model         .type       ME   RMSE    MAE      MPE  MAPE  MASE RMSSE    ACF1
##   <chr>          <chr>    <dbl>  <dbl>  <dbl>    <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(aus_gas_t… Trai… -9.34e-4 0.0751 0.0570 -0.00928  1.52 0.471 0.402 -0.0283

fit_box_cox %>% 
  forecast(h = 15) %>%
  autoplot(aus_gas) +
  labs(title = "Australia Gas Production (Box Cox)") +
  guides(colour = guide_legend(title = "Forecast"))

## After transforming the data using Box-Cox, we can see that we have a much smaller RMSE, the Box-Cox transformation gives as an additive seasonality model while the original data gives us a multiplicative seasonality model. The heights of the seasonal periods change over time in the original data. This is why in the model with the original data, multiplicative seasonality is used.

damped_fit <- aus_gas %>%
  model(fit = ETS(Gas  ~ trend('Ad', phi = 0.92)))

damped_fit %>% 
  forecast(h = 15) %>%
  autoplot(aus_gas) +
  labs(title = "Australia Gas Production (Damped Model)") +
  guides(colour = guide_legend(title = "Forecast"))

\[8.8\]

#A.
set.seed(237)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>%
  autoplot(Turnover)

## multipl seasonality is necessary for this dataset since the heights of the seasonal periods are changing as the years go by.

#B.
fit <- myseries |>
  model(
    Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    Damped = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )
fc <- fit |> forecast(h = "3 years")
fc |>
  autoplot(myseries, level = NULL) +
  labs(title="Turnover Time Series",
       y="turnover") +
  guides(colour = guide_legend(title = "Forecast"))

#C.
fit %>%
  accuracy()

## # A tibble: 2 × 12
##   State       Industry .model .type     ME  RMSE   MAE     MPE  MAPE  MASE RMSSE
##   <chr>       <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 South Aust… Liquor … Multi… Trai… 0.0387  1.44  1.04 -0.508   5.01 0.503 0.535
## 2 South Aust… Liquor … Damped Trai… 0.115   1.46  1.04  0.0640  4.98 0.507 0.541
## # ℹ 1 more variable: ACF1 <dbl>

The RMSE is slightly lower for the Damped model compared to the Multiplicative model, which makes the Damped model the preferable model.

#D.
fit %>%
  select(Damped) %>%
  gg_tsresiduals()

## The innovation residuals plot looks like white noise. Therefore, we satisfied the nonconstant variance assumption.

#E.
myseries_train <- myseries %>%
  filter(year(Month) < 2011)

myseries_test <- myseries %>%
  filter(year(Month) >= 2011)
myseries_fit <- myseries_train %>%
  model(
    Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    Damped = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.8) + season("M"))
  )

myseries_forecasts <- myseries_fit |>
  forecast(h = nrow(myseries_test))

myseries_forecasts |>
  autoplot(
    myseries,
    level = NULL
  ) +
  labs(
    y = "Megalitres",
    title = "Forecasts for quarterly beer production"
  ) +
  guides(colour = guide_legend(title = "Forecast"))

accuracy(myseries_forecasts, myseries_test)

## # A tibble: 2 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped    Sout… Liquor … Test   5.34  6.79  5.53 10.5  11.0    NaN   NaN 0.594
## 2 Multipli… Sout… Liquor … Test   1.20  2.66  2.03  2.07  4.10   NaN   NaN 0.556

#It looks like the Multiplicative method has a much lower RMSE than the Damped method, we can conclude that the seasonal naive approach is better than the Damped model, but the Multiplicative model has the overall lowest RMSE.

\[8.9\]

lambda <- myseries_train |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)

myseries_transformed <- myseries %>%
  mutate(Turnover_transformed = box_cox(Turnover, lambda))

myseries_train <- myseries_train %>%
  mutate(Turnover_transformed = box_cox(Turnover, lambda))

myseries_test <- myseries_test %>%
  mutate(Turnover_transformed = box_cox(Turnover, lambda))

myseries_stl_components <- myseries_transformed %>%
  model(
    STL(Turnover_transformed ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) %>%
  components()

myseries_stl_components_train <- myseries_train %>%
  model(
    STL(Turnover_transformed ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) %>%
  components()

myseries_stl_components_test <- myseries_test %>%
  model(
    STL(Turnover_transformed ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) %>%
  components()

myseries_stl_components_train %>%
  autoplot()

head(myseries_stl_components_train)

## # A dable: 6 x 9 [1M]
## # Key:     State, Industry, .model [1]
## # :        Turnover_transformed = trend + season_year + remainder
##   State          Industry .model    Month Turnover_transformed trend season_year
##   <chr>          <chr>    <chr>     <mth>                <dbl> <dbl>       <dbl>
## 1 South Austral… Liquor … "STL(… 1982 Apr                 1.67  1.69     -0.0450
## 2 South Austral… Liquor … "STL(… 1982 May                 1.58  1.68     -0.0679
## 3 South Austral… Liquor … "STL(… 1982 Jun                 1.55  1.68     -0.106 
## 4 South Austral… Liquor … "STL(… 1982 Jul                 1.61  1.68     -0.0837
## 5 South Austral… Liquor … "STL(… 1982 Aug                 1.63  1.69     -0.0591
## 6 South Austral… Liquor … "STL(… 1982 Sep                 1.64  1.70     -0.0403
## # ℹ 2 more variables: remainder <dbl>, season_adjust <dbl>

myseries_season_adjust <- myseries_stl_components %>%
  select(season_adjust)

myseries_season_adjust_test <- myseries_stl_components_test %>%
  select(season_adjust)

myseries_season_adjust_train <- myseries_stl_components_train %>%
  select(season_adjust)

myseries_season_adjust_fit <- myseries_season_adjust_train %>%
  model(ETS(season_adjust))

myseries_forecasts <- myseries_season_adjust_fit |>
  forecast(h = nrow(myseries_test))

report(myseries_season_adjust_fit)

## Series: season_adjust 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.39537 
##     beta  = 0.0001136495 
##     gamma = 0.2052473 
## 
##   Initial states:
##      l[0]        b[0]       s[0]      s[-1]        s[-2]      s[-3]       s[-4]
##  1.630384 0.004084654 0.04326809 0.01886194 -0.003070051 -0.1434877 -0.03335487
##         s[-5]      s[-6]        s[-7]      s[-8]      s[-9]     s[-10]
##  -0.007240666 0.02416459 -0.008748813 0.01096883 0.05125872 0.03391985
##      s[-11]
##  0.01346011
## 
##   sigma^2:  0.004
## 
##      AIC     AICc      BIC 
## 131.1259 132.9974 196.4661

accuracy(myseries_forecasts, myseries_season_adjust_test)

## # A tibble: 1 × 10
##   .model             .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>              <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(season_adjust) Test  -0.109 0.134 0.111 -3.27  3.33   NaN   NaN 0.791

myseries_forecasts %>%
  autoplot(myseries_season_adjust)

##RMSE is 0.1335117, which is much lower than any of the RMSEs for the previous parts. The final model is an ETS(A, A, N) model(Holt’s linear method), which is to be expected since we have taken the seasonality out of the original data. This model performs the best overall.

HW5

CN

2024-10-05

The output above shows us that the parameter estimate for α=0.3221247. The estimate for the initial state ℓ0 is 100646.6.

The output above shows us that the 95% prediction interval for the first forecast is between 76871.01 and 113502.1 when computed using the y±1.96s formula.

The output above shows us that the RMSE for the training data is 2.381248.

The increasing trend of this data makes a SES model inappropriate for this type of data. Holt’s method looks like it works well with this data, since the exports show an increasing trend as the years go by.

The RMSE is slightly lower for the Damped model compared to the Multiplicative model, which makes the Damped model the preferable model.