HW #5 Rmarkdown

Questions

Exercise 1

miss.piggy <- aus_livestock %>%
  filter(State == "Victoria" & (Animal == "Pigs") )

fit.piggy <- miss.piggy %>%
  model(ets = ETS(Count)) %>%
  report()

## Series: Count 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.3579401 
##     gamma = 0.0001000139 
## 
##   Initial states:
##     l[0]     s[0]    s[-1]     s[-2]     s[-3]     s[-4]     s[-5]   s[-6]
##  95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
##      s[-7]    s[-8]     s[-9]   s[-10]   s[-11]
##  -579.8107 1215.464 -2827.091 1739.465 6441.989
## 
##   sigma^2:  60742898
## 
##      AIC     AICc      BIC 
## 13545.38 13546.26 13610.24

fit.piggy

## # A mable: 1 x 3
## # Key:     Animal, State [1]
##   Animal State             ets
##   <fct>  <fct>         <model>
## 1 Pigs   Victoria <ETS(A,N,A)>

forecast.piggy <- fit.piggy %>%
  forecast(h = 4) %>%
  autoplot(miss.piggy)

fc.piggy <- fit.piggy %>%
  forecast(h=4)

forecast.piggy

#b

firstfore <- fc.piggy %>%
  pull(Count) %>%
  head(1)

standard.dev <- augment(fit.piggy) %>%
  pull(.resid) %>%
  sd()

 
bottom <- firstfore - 1.96 * standard.dev
top <- firstfore + 1.96 * standard.dev
results <- c(bottom, top)
names(results) <- c('Lower', 'Upper')
results

## <distribution[2]>
##             Lower             Upper 
## N(69328, 6.1e+07) N(99521, 6.1e+07)

hilo(fc.piggy$Count, 95)

## <hilo[4]>
## [1] [69149.19,  99700.22]95 [68933.39, 101382.56]95 [74445.66, 108687.93]95
## [4] [72125.42, 108071.45]95

Exercise 2

#a
iceland <- global_economy %>%
  filter(Country == "Iceland") %>%
  select("Exports")
autoplot

## function (object, ...) 
## {
##     UseMethod("autoplot")
## }
## <bytecode: 0x0000000015127a58>
## <environment: namespace:ggplot2>

#There is a lot of fluctuation in the data. and no clear trend despite the massive spike at the end. I don't spot a clear seasonality or cycle.

#b
icelandfit <- iceland %>%
  model(ETS(Exports ~ error("A") + trend("N") + season("N")))
  
icelandfc <- icelandfit %>%
  forecast(h=10) %>%
  autoplot(iceland)

icelandfc

#c
accuracy(icelandfit)

## # A tibble: 1 x 10
##   .model                 .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ error(~ Trai~ 0.0980  3.47  2.61 -0.192  6.88 0.983 0.991 0.257

#RMSE = 3.47

#d
icelandfit2 <- iceland %>%
  model(ETS(Exports ~ error("A") + trend("A") + season("N")))
accuracy(icelandfit2)

## # A tibble: 1 x 10
##   .model                 .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>                  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Exports ~ error(~ Trai~ 0.0929  3.50  2.67 -0.222  7.03  1.01 0.999 0.254

#RMSE = 3.50 which is higher than the rmse from the A,N,N model. Trying to adjust for trend actually made our forecast less accurate, which mean's there isn't a clear trend. So we'll go with the A,N,N model.


#e
icelandfc2 <- icelandfit2 %>%
  forecast(h=10) %>%
  autoplot(iceland)
icelandfc2

#The forecast from the A,A,N has a slight upward slope to it while the forecast from the A,N,N is relatively flat (constant). This is of course because the A.A.N is accounting for the trend in the data while the A.N,N is not. 
#The A,N,N yields a lower rmse and mae, but it's hard to believe exports will stay constant given the fluctuation in the data. I would say I trust the A,N,N more because there isn't really a clear trend to the data.

Exercise 3

china <- global_economy %>%
  filter(Country == "China") %>%
  mutate(GDP = GDP/1e9) %>%
  select(GDP)
autoplot(china)

## Plot variable not specified, automatically selected `.vars = GDP`

#There is a clear upward trend with no seasonality. So we want an ETS that accounts for trend but no seasonality. 

fit.china <- china %>%
  model(ets = ETS(GDP))
fit.china

## # A mable: 1 x 1
##            ets
##        <model>
## 1 <ETS(M,A,N)>

# M,A,N is the proposed ETS model let's look at it. 

fit.china.man <- china %>%
  model(ETS(GDP ~ error("M") + trend("A") + season("N")))

fit.china.manfc <- fit.china.man %>%
  forecast(h= 20) %>%
  autoplot(china)
fit.china.manfc

damp.chinese.man <- china %>%
  model(hw = ETS(GDP ~ error("M") + trend("Ad") + season("N"))) %>%
  forecast(h = 20) %>%
  autoplot(china)
damp.chinese.man

chinese.fits <- china %>%
  model(
    SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
    Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
    Damped = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
  )

chinese.fits.fc <- chinese.fits %>%
  forecast(h=20) %>%
  autoplot(china)
chinese.fits.fc

accuracy(chinese.fits)

## # A tibble: 3 x 10
##   .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 SES    Training 210.   416. 213.   8.25 10.9  0.983 0.991  0.789  
## 2 Holt   Training  23.6  190.  96.0  1.36  7.72 0.443 0.453  0.00882
## 3 Damped Training  29.4  190.  94.8  1.86  7.56 0.437 0.454 -0.00684

china %>%
  features(GDP, features = guerrero)

## # A tibble: 1 x 1
##   lambda_guerrero
##             <dbl>
## 1         -0.0345

#Lambda = -.0345

lambda <- china %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)

china.box.cox <- china %>%
  model(
    SES = ETS(box_cox(GDP, lambda) ~ error("A") + trend("N") + season("N")),
    Holt = ETS(box_cox(GDP, lambda) ~ error("A") + trend("A") + season("N")),
    Damped = ETS(box_cox(GDP, lambda) ~ error("A") + trend("Ad") + season("N"))
  )
china.box.coxfc <- china.box.cox %>%
  forecast(h=20) %>%
  autoplot(china)

china.box.coxfc

Exercise 4

aus_production

## # A tsibble: 218 x 7 [1Q]
##    Quarter  Beer Tobacco Bricks Cement Electricity   Gas
##      <qtr> <dbl>   <dbl>  <dbl>  <dbl>       <dbl> <dbl>
##  1 1956 Q1   284    5225    189    465        3923     5
##  2 1956 Q2   213    5178    204    532        4436     6
##  3 1956 Q3   227    5297    208    561        4806     7
##  4 1956 Q4   308    5681    197    570        4418     6
##  5 1957 Q1   262    5577    187    529        4339     5
##  6 1957 Q2   228    5651    214    604        4811     7
##  7 1957 Q3   236    5317    227    603        5259     7
##  8 1957 Q4   320    6152    222    582        4735     6
##  9 1958 Q1   272    5758    199    554        4608     5
## 10 1958 Q2   233    5641    229    620        5196     7
## # ... with 208 more rows
## # i Use `print(n = ...)` to see more rows

gas <- aus_production %>%
  select(Gas)
autoplot(gas)

## Plot variable not specified, automatically selected `.vars = Gas`

fit.gas <- gas %>%
  model(ets = ETS(Gas))
fit.gas

## # A mable: 1 x 1
##            ets
##        <model>
## 1 <ETS(M,A,M)>

# M,A,M is the reccommended ETS model 


fit.gas2 <- gas %>%
  model(ETS( Gas ~ error("M") + trend("A") + season("M")))
accuracy(fit.gas2)

## # A tibble: 1 x 10
##   .model                .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                 <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "ETS(Gas ~ error(\"M~ Trai~ -0.115  4.60  3.02 0.199  4.08 0.542 0.606 -0.0131

#rmse = 4.60

fit.gas.fc <- fit.gas2 %>%
  forecast(h=36) %>%
  autoplot(gas)
fit.gas.fc

#Multiplicative seasonality is necessary here because our variance is increasing! Clearly seen in the plot of gas. 

damp.gas <- gas %>%
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
accuracy(damp.gas)

## # A tibble: 1 x 10
##   .model              .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>               <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "ETS(Gas ~ error(\~ Trai~ -0.00439  4.59  3.03 0.326  4.10 0.544 0.606 -0.0217

#rmse = 4.59
#The rmse from the damped trend model is lower than that of the non-damped, but the mae is higher. 

damp.gas.fc <- damp.gas %>%
  forecast(h=36) %>%
  autoplot(gas)

damp.gas.fc

#I would say the forecast are about identical and the rmse values are really close to one another. So I would not say it improves the forecast no.

Exercise 5

set.seed(241241241)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)

#a

#Multiplicative seasonality will be necessary because the variance is increasing over time. 


#b

multi.model <- myseries %>%
  model(Multiplicative = ETS(Turnover ~ error("M") + trend("A") + season("M")))

accuracy(multi.model)

## # A tibble: 1 x 12
##   State   Indus~1 .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>   <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Victor~ Other ~ Multi~ Trai~  1.05  20.2  14.0 0.205  2.91 0.379 0.424 -0.0892
## # ... with abbreviated variable name 1: Industry

#rmse = 20.2 

multi.model.fc <- multi.model %>%
  forecast(h=36) %>%
  autoplot(myseries)
multi.model.fc

multi.model.damp <- myseries %>%
  model(Multiplicative = ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
accuracy(multi.model.damp)

## # A tibble: 1 x 12
##   State    Indus~1 .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>    <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Victoria Other ~ Multi~ Trai~  1.73  20.3  14.1 0.314  2.92 0.382 0.425 -0.115
## # ... with abbreviated variable name 1: Industry

#rmse = 20.3

multi.damp.fc <- multi.model.damp %>%
  forecast(h=36) %>%
  autoplot(myseries)
multi.damp.fc

#c

#The rmse and mae are lower without the damp than with the damp. So I'll go with the non-damp model. 


#d

gg_tsresiduals(multi.model)

#The autocorrelation is significant. A ljung-box test would surely clear that up further even though it may not even be necessary given how blatant that is. 
#The innovation residuals are not bad. There isn't a clear trend and it's close to being homoskedastic. The residuals appear to be close to a normal distribution maybe with a slight skew to the right.


#e 


train.turnover <- myseries %>%
  filter(year(Month) < 2011)

test.turnover <- myseries %>%
  filter(year(Month) > 2010)


turnover.fit <- train.turnover %>%
  model(
    "Holt-Winters' Damped" = ETS(Turnover ~ error("M") + 
                                   trend("Ad") +
                                   season("M")),
    "Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") + 
                                           trend("A") +
                                           season("M")),
    "Seasonal Naïve Forecast" = SNAIVE(Turnover)
  )
fc.turnover <- turnover.fit %>%
  forecast(h = 36)

accuracy(fc.turnover, test.turnover)

## # A tibble: 3 x 12
##   .model     State Indus~1 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt-Wint~ Vict~ Other ~ Test  -34.6  72.2  57.0 -4.63  7.15   NaN   NaN 0.672
## 2 Holt-Wint~ Vict~ Other ~ Test  -71.2 111.   82.8 -9.20 10.5    NaN   NaN 0.731
## 3 Seasonal ~ Vict~ Other ~ Test   17.6  60.6  51.1  2.06  6.23   NaN   NaN 0.653
## # ... with abbreviated variable name 1: Industry

#No I could not. The seasonal naive model produced the lowest rmse and mae therefore I did not beat the seasonal naive.

Exercise 6

#a
aus.trips <- tourism %>%
  summarise(Trips = sum(Trips))

autoplot(aus.trips)

## Plot variable not specified, automatically selected `.vars = Trips`

#A small declining trend until around 2010 where it begins to increase pretty dramatically. There is definitely seasonality to the data, it spikes in Q1 and then decreases until the next Q1. 


#b

stl.trips <- aus.trips %>%
  model(STL(Trips)) %>%
  components() %>%
  as_tsibble() %>%
  autoplot(season_adjust)
stl.trips

#c

aus.aadn <- aus.trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + 
                              trend("Ad") + 
                              season("N")
                        )
    )
  )
accuracy(aus.aadn)

## # A tibble: 1 x 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "decomposition_model(~ Trai~  103.  763.  576. 0.367  2.72 0.607 0.629 -0.0174

#rmse = 763, mae = 576


aus.aadn.fc <- aus.aadn %>%
  forecast(h = "2 years") %>%
  autoplot(aus.trips) + labs(title = "A,Ad,N Forecast")
aus.aadn.fc

#d

aus.aan <- aus.trips %>%
  model(
    decomposition_model(STL(Trips),
                        ETS(season_adjust ~ error("A") + 
                              trend("A") + 
                              season("N")
                        )
    )
  )
accuracy(aus.aan)

## # A tibble: 1 x 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "decomposition_model(~ Trai~  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182

#rmse = 763, mae = 574

aus.aan.fc <- aus.aan %>%
  forecast(h = "2 years") %>%
  autoplot(aus.trips) + labs( title = "A,A,N Forecast")
aus.aan.fc

#e

aus.ets.model <- aus.trips %>%
  model(ETS(Trips))
aus.ets.model

## # A mable: 1 x 1
##   `ETS(Trips)`
##        <model>
## 1 <ETS(A,A,A)>

#A,A,A
accuracy(aus.ets.model)

## # A tibble: 1 x 10
##   .model     .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 ETS(Trips) Training  105.  794.  604. 0.379  2.86 0.636 0.653 -0.00151

#rmse = 794
  
aus.ets.fc <- aus.ets.model %>%
  forecast(h = "2 years") %>%
  autoplot(aus.trips) + labs(title = "A,A,A Forecast")
aus.ets.fc

#f
#The A,A,N model gives the best in sample fits. It has the same rmse as the A,Ad,N but a lower mae than it.


#g

#The forecasts are honestly really similar for all 3, so I'm going to stick with the one with the lowest rmse. The A,A.N model. 

#H
gg_tsresiduals(aus.aan)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

#The autocorellation looks good with the exeption of lag 14, but that's pretty far out, so it's still good. The residuals are definetely skewed to the right. The residuals look close to white noise with some questionable points.

###Exercise 7

new.zeal <- aus_arrivals %>% 
  filter(Origin == 'NZ')

#a 

autoplot(new.zeal)

## Plot variable not specified, automatically selected `.vars = Arrivals`

#Looks like it has a slightly increasing seasonal variation that levels off around 2005. An upward trend is present for sure. 


#b

zeal.training <- new.zeal %>%
  filter(year(Quarter) < 2011)

zeal.test <- new.zeal %>%
  filter(year(Quarter) > 2010)

zeal.training %>%
  model(
    ETS(Arrivals ~ error("M") +
          trend("A") +
          season("M"))
  ) %>%
  forecast(h="2 years") %>%
  autoplot(level = NULL) + 
  autolayer(new.zeal, Arrivals) +
  labs(title = "Train forecast comparison to the actual data.")

#c

#Multiplicative seasonality is necessary because we have an increasing variance. 


#d

fit.zeal <- zeal.training %>%
  model(
    'ETS model' = ETS(Arrivals),
    'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
                                            trend("A") +
                                            season("A")),
    'Seasonal Naïve Method' = SNAIVE(Arrivals),
    'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)), 
                                                                             ETS(season_adjust))
  ) 

zeal.fc <- fit.zeal %>%
  forecast(h= "2 years")

zeal.fc %>%
  autoplot(level = NULL) + 
  autolayer(zeal.test, Arrivals) +
  guides(colour=guide_legend(title="Forecast")) +
  labs(title = "New Zealand Visitors Train Forecast Comparison to the Actual Data.")

#e

accuracy(zeal.fc, zeal.test)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2012 Q4

## # A tibble: 4 x 11
##   .model      Origin .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>       <chr>  <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Additive t~ NZ     Test  -20920. 27624. 20920. -6.78  6.78   NaN   NaN -0.0169
## 2 ETS model   NZ     Test  -10467. 18302. 12746. -3.27  4.16   NaN   NaN -0.0857
## 3 Seasonal N~ NZ     Test    5692. 14962. 14203.  2.27  4.97   NaN   NaN -0.116 
## 4 STL decomp~ NZ     Test  -22350. 28963. 22350. -7.28  7.28   NaN   NaN -0.0118

#The seasonal naive method produced the lowest rmse. 

Snaive.zeal <- zeal.training %>%
  model(SNAIVE(Arrivals))

gg_tsresiduals(Snaive.zeal)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

#We have autocorrelation from the beginning. The residuals do not appear to be normally distributed around 0. They also do not appear to be resemble white noise. This model does not look great., so I would say it does not pass the test.


#f

zeal.cv <- new.zeal %>%
  slice(1:(n() - 3)) %>%
  stretch_tsibble(.init = 36, .step = 3)

zeal.cv %>% 
  model(
    'ETS model' = ETS(Arrivals),
    'Additive to a Log Transformed' = ETS(log(Arrivals) ~ error("A") +
                                            trend("A") +
                                            season("A")),
    'Seasonal Naïve Method' = SNAIVE(Arrivals),
    'STL decomposition applied to the log transformed' = decomposition_model(STL(log(Arrivals)), 
                                                                             ETS(season_adjust))
  ) %>%
  forecast(h = 3) %>%
  accuracy(new.zeal)

## # A tibble: 4 x 11
##   .model          Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Additive to a ~ NZ     Test  -375. 14347. 11021. 0.200  5.83 0.741 0.746 0.309
## 2 ETS model       NZ     Test  4627. 15327. 11799. 2.23   6.45 0.793 0.797 0.283
## 3 Seasonal Naïve~ NZ     Test  8244. 18768. 14422. 3.83   7.76 0.970 0.976 0.566
## 4 STL decomposit~ NZ     Test  4252. 15618. 11873. 2.04   6.25 0.798 0.812 0.244

#Now the additive to a log transformed is giving the lowest rmse instead of the seasonal naive.

###Exercise 8

#a

cement <- aus_production %>%
  select(Quarter, Cement) %>%
  as_tsibble(index = Quarter)

autoplot(cement)

## Plot variable not specified, automatically selected `.vars = Cement`

cement.cv <- cement %>%
  slice(1:(n()-4)) %>%                             
  stretch_tsibble(.init = 5, .step = 1)                     

cement.model <- cement.cv %>%
  model(ETS(Cement),
        SNAIVE(Cement)) %>%
  forecast(h = "1 year")

#b

cement.cv2 <- cement %>%
  stretch_tsibble(.init = 5, .step = 4)
  
cement.model2 <- cement.cv2 %>%
  model(ETS(Cement),
        SNAIVE(Cement)) %>%
  forecast(h= "1 year")

cement.accuracy <- accuracy(cement.model2, cement) %>%
  mutate(MSE = (RMSE)^2) %>%
  select(MSE, RMSE)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 3 observations are missing between 2010 Q3 and 2011 Q1

cement.accuracy

## # A tibble: 2 x 2
##      MSE  RMSE
##    <dbl> <dbl>
## 1  9962.  99.8
## 2 18028. 134.

#Looks like our ETS model has the smaller mse and rmse so we'll go with that one. Looking at the data I would say that makes sense. The seasonal naive can't account for the clear upward trend of the data.

HW #5 Rmarkdown

Zach Gooch

2022-11-09

Load packages and data

Questions

Exercise 1

Exercise 2

Exercise 3

Exercise 4

Exercise 5

Exercise 6