HW#5

library(fpp3)

## ── Attaching packages ──────────────────────────────────────────── fpp3 0.4.0 ──

## ✔ tibble      3.1.8     ✔ tsibble     1.1.2
## ✔ dplyr       1.0.9     ✔ tsibbledata 0.4.0
## ✔ tidyr       1.2.0     ✔ feasts      0.2.2
## ✔ lubridate   1.8.0     ✔ fable       0.3.1
## ✔ ggplot2     3.3.6

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

Question 1

a.)

pigs <- aus_livestock %>%
  filter(State == "Victoria", Animal == "Pigs")

autoplot(pigs)

## Plot variable not specified, automatically selected `.vars = Count`

pigsets <- pigs %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N"))) 

report(pigsets)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

components(pigsets) %>%
  autoplot()

## Warning: Removed 1 row(s) containing missing values (geom_path).

pigsforecast <- pigsets %>%
  forecast(h = 4) 

pigs2 <- pigs %>%
  filter(Month >= yearmonth(as.Date("2010-01-01")))

pigsforecast %>%
  autoplot(pigs2)+
  labs(title = "Number of Pigs in Victoria Forecast 4 Months")

b.)

Question 2

a.)

italy <- global_economy %>%
  filter(Country == "Italy") %>%
  select(Year, Exports)

italy %>%
  ggplot(aes(x = Year, y = Exports)) +
  geom_line()+ labs(title = "Italian Exports")

b.)

italianestforecast <- italy %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) %>%
  forecast(h = 10) 
italianestforecast %>%
  autoplot(italy)+ labs(title = "Italian Exports Forecasted 10 Years")

c.)

train <- italy %>%
  filter(Year < 2002)

test <- italy %>%
  filter(Year > 2002)

trainetsforecast <- train %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N"))) %>%
  forecast(h = 10)

accuracy(trainetsforecast, test, list(rmse = RMSE))

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2002

## # A tibble: 1 × 3
##   .model                                                       .type  rmse
##   <chr>                                                        <chr> <dbl>
## 1 "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"N\"))" Test   1.72

d.)

italianestforecastaan <- italy %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N"))) %>%
  forecast(h = 10) 

trainetsforecastaan <- train %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N"))) %>%
  forecast(h = 10)

accuracy(trainetsforecastaan, test, list(rmse = RMSE))

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2002

## # A tibble: 1 × 3
##   .model                                                       .type  rmse
##   <chr>                                                        <chr> <dbl>
## 1 "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"N\"))" Test   2.87

e.)

italianestforecastaan %>%
  autoplot(italy)+ labs(title = "Italian Exports Forecasted 10 Years, AAN")

Question 3

china <- global_economy %>%
  filter(Country == 'China') %>%
  mutate(GDP = GDP/1e9) %>%
  select(c(Country, Code, Year, GDP))

(china)

## # A tsibble: 58 x 4 [1Y]
## # Key:       Country [1]
##    Country Code   Year   GDP
##    <fct>   <fct> <dbl> <dbl>
##  1 China   CHN    1960  59.7
##  2 China   CHN    1961  50.1
##  3 China   CHN    1962  47.2
##  4 China   CHN    1963  50.7
##  5 China   CHN    1964  59.7
##  6 China   CHN    1965  70.4
##  7 China   CHN    1966  76.7
##  8 China   CHN    1967  72.9
##  9 China   CHN    1968  70.8
## 10 China   CHN    1969  79.7
## # … with 48 more rows

china %>%
  autoplot(GDP) +
  labs(y="Billions $USD", title="GDP: China") +
  guides(colour = "none")

fitc <- china %>%
  model(
    SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
    Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
    Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
  )

gdpforecast <- fitc %>% forecast(h = 20)

gdpforecast %>%
  autoplot(china, level=NULL) +
  labs(y="Billions $USD", title="GDP: China") +
  guides(colour = guide_legend(title = "Forecast"))

lambdac <- china %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

fit_bc <- china %>%
  model(
    SES = ETS(box_cox(GDP, lambdac) ~ error("A") + trend("N") + season("N")),
    Holt = ETS(box_cox(GDP, lambdac) ~ error("A") + trend("A") + season("N")),
    Damped = ETS(box_cox(GDP, lambdac) ~ error("A") + trend("Ad") + season("N"))
  )


boxcoxfc <- fit_bc %>% forecast(h = 20)

boxcoxfc %>%
  autoplot(china, level=NULL) +
  labs(title = "Transformed China GDP") +
  guides(colour = guide_legend(title = "Forecast"))

Question 4

gas <- aus_production %>%
  select(Quarter, Gas)

gas %>%
  autoplot(Gas)

fitg <- gas %>%
  model(
    add = ETS(Gas ~ error("A") + trend("A") + season("N")),
    mult = ETS(Gas ~ error("M") + trend("A") + season("N")),
    add_sea = ETS(Gas ~ error("A") + trend("A") + season("A")),
    mult_sea = ETS(Gas ~ error("M") + trend("A") + season("M")),
    mult_sea_damp = ETS(Gas ~ error("M") + trend("Ad") + season("M")))

fc <- fitg %>% forecast(h = 36)

fc %>%
  autoplot(gas, level=NULL) +
  labs(y="Petajoules", title="Gas Production: Australia") +
  guides(colour = guide_legend(title = "Forecast"))

Question 5

set.seed(478456)

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

autoplot(myseries)

## Plot variable not specified, automatically selected `.vars = Turnover`

a.) The variation in the seasonal in the data increases as the trend increases. Since it varies, the multiplicative approach necessary since additive cannot handle seasonal variation.

b.) The damped and linear forecast are nearly identical with the damped method being slightly lower. I believe this is do to the trend of the data leveling off near the end.

fitms <- myseries %>%
  model(
    M_S = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    M_S_D = ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
  )

msforecast <- fitms %>% forecast(h = 36)

msforecast %>%
  autoplot(myseries, level=NULL) +
  labs(y="AUD (Millions)", title="Forecasted Retail Turnover in Australia")

c.) The damped model has a slightly lower RSME suggesting it should be the preferred method.

accuracy <- accuracy(fitms)

accuracy

## # A tibble: 2 × 12
##   State  Indus…¹ .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>   <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Queen… Depart… M_S    Trai… -0.139  9.50  7.17 -0.109  3.80 0.660 0.684 -0.138
## 2 Queen… Depart… M_S_D  Trai…  0.785  9.43  7.11  0.386  3.77 0.654 0.679 -0.116
## # … with abbreviated variable name ¹Industry

d.) Even though the residual graph appears to be white noise, there is autocorrelation according to the ACF plot.

resid <- myseries %>%
  model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))

resid %>% gg_tsresiduals()

e.)

myseriestest <- myseries %>%
  filter(year(Month) < 2011) %>%
  model(
    snaive = SNAIVE(Turnover),
    `Holt-Winter` = ETS(Turnover ~ error("M") + trend("A") + season("M")),
    `Holt-Winter Damped` = ETS(Turnover ~ error("M") + trend("Ad", phi = 0.9) + season("M"))
  ) %>%
  forecast(h =36) %>%
  accuracy(myseries)

Question 6

a.)

trips <- tourism %>%
  summarise(Trips = sum(Trips))

trips %>% autoplot(Trips)

b.)

trips %>%
  model(
    STL(Trips ~ trend(window = 4) + season(window = "periodic"),
        robust = TRUE)) %>%
  components() %>%
  autoplot(season_adjust)

c.)

stletsdamped <- decomposition_model(
  STL(Trips),
  ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))

trips %>%
  model(dcmp_AAdN = stletsdamped) %>%
  forecast(h =8) %>%
  autoplot(trips)

d.)

fittrip <- trips %>%
  model(
    "Holt Linear Method" = ETS(Trips ~ error("A") + trend("A") + season("N")))

tripfc <- fittrip %>% forecast ( h =8)
tripfc %>%
  autoplot(trips)

e.)

fittrip <- trips %>%
  model(ETS(Trips))

etsfc <- fittrip %>% forecast(h = 8)

etsfc %>% autoplot(trips)

f.)

aus_trip <- trips %>%  
  model(
    "Holt Linear Method" = ETS(`Trips` ~ error("A") +  trend("Ad") + season("A")),
    "Holt Multiplicative" = ETS(`Trips` ~ error("A") +  trend("Ad") + season("N")),
    "ETS" = ETS(`Trips`))

fc <- aus_trip %>% forecast(h = 8)

accuracy(aus_trip)

## # A tibble: 3 × 10
##   .model              .type      ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>               <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 Holt Linear Method  Traini…  103.  795.  606. 0.357  2.86 0.638 0.654  4.34e-4
## 2 Holt Multiplicative Traini…  135. 1215. 1000. 0.354  4.66 1.05  1.00   9.53e-2
## 3 ETS                 Traini…  105.  794.  604. 0.379  2.86 0.636 0.653 -1.51e-3

g.)

h.)

Question 7

a.)

nz_arrivals <- aus_arrivals %>% 
  filter(Origin == "NZ")
autoplot(nz_arrivals, Arrivals) +
  ggtitle("Arrivals from New Zealand")

b.)

trainnz <- nz_arrivals %>%
  slice(1:(nrow(nz_arrivals)-(4*2)))   

trainnz %>%
  model(
    ETS(Arrivals ~ error("M") +
          trend("A") +
          season("M"))
  ) %>%
  forecast(h="2 years") %>%
  autoplot(level = NULL) + 
  autolayer(nz_arrivals, Arrivals) +
  labs(title = "Train forecast comparison to the actual data.")

c.)

d.)

fit <- trainnz %>%
  model(
    'ETS' = ETS(Arrivals),
    'ETS Log' = ETS(log(Arrivals) ~ error("A") + trend("A") + season("A")),
    'Snaive' = SNAIVE(Arrivals),
    'LogSTL' = decomposition_model(STL(log(Arrivals)),ETS(season_adjust))) 

fc <- fit %>%
  forecast(h= 8) 

fc %>%
  autoplot(level = NULL) +
  autolayer(filter(nz_arrivals, year(Quarter) > 2000), Arrivals)+
  guides(colour=guide_legend(title="Forecast")) +
  labs(title = "New Zealand Visitors Train Forecast Comparison to the Actual Data.")

e.)

fc %>% accuracy(nz_arrivals)

## # A tibble: 4 × 11
##   .model  Origin .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>   <chr>  <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS     NZ     Test   -3495. 14913. 11421. -0.964  3.78 0.768 0.771 -0.0260
## 2 ETS Log NZ     Test   -7093. 17765. 12851. -2.17   4.20 0.864 0.918  0.0352
## 3 LogSTL  NZ     Test  -12535. 22723. 16172. -4.02   5.23 1.09  1.17   0.109 
## 4 Snaive  NZ     Test    9709. 18051. 17156.  3.44   5.80 1.15  0.933 -0.239

etsresid <- fit %>% select('ETS')
etsresid %>%
  gg_tsresiduals()

augment(etsresid) %>%
  features(.resid, ljung_box)

## # A tibble: 1 × 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 ETS       1.29     0.256

f.)

cross <- nz_arrivals %>%
  slice(1:(n() - 3)) %>%
  stretch_tsibble(.init = 36, .step = 3)

cross %>% 
  model(
    'ETS' = ETS(Arrivals),
    'ETS Log' = ETS(log(Arrivals) ~ error("A") + trend("A")+ season("A")),
    'Snaive' = SNAIVE(Arrivals),
    'LogSTL' = decomposition_model(STL(log(Arrivals)), ETS(season_adjust))) %>%
  forecast(h = 3) %>%
  accuracy(nz_arrivals)

## # A tibble: 4 × 11
##   .model  Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS     NZ     Test  4627. 15327. 11799. 2.23   6.45 0.793 0.797 0.283
## 2 ETS Log NZ     Test  -375. 14347. 11021. 0.200  5.83 0.741 0.746 0.309
## 3 LogSTL  NZ     Test  4252. 15618. 11873. 2.04   6.25 0.798 0.812 0.244
## 4 Snaive  NZ     Test  8244. 18768. 14422. 3.83   7.76 0.970 0.976 0.566

Question 8

a.)

b.)

Extra

HW#5

Nathan Snow

2022-11-10