HW 5

Questions

Exercise 1

# Pigs slaughtered in Victoria:

pigs <- aus_livestock %>%
  as_tsibble(index = Month) %>% 
  filter(Animal == "Pigs", State == "Victoria")
pigs

## # A tsibble: 558 x 4 [1M]
## # Key:       Animal, State [1]
##       Month Animal State     Count
##       <mth> <fct>  <fct>     <dbl>
##  1 1972 Jul Pigs   Victoria  94200
##  2 1972 Aug Pigs   Victoria 105700
##  3 1972 Sep Pigs   Victoria  96500
##  4 1972 Oct Pigs   Victoria 117100
##  5 1972 Nov Pigs   Victoria 104600
##  6 1972 Dec Pigs   Victoria 100500
##  7 1973 Jan Pigs   Victoria  94700
##  8 1973 Feb Pigs   Victoria  93900
##  9 1973 Mar Pigs   Victoria  93200
## 10 1973 Apr Pigs   Victoria  78000
## # … with 548 more rows
## # ℹ Use `print(n = ...)` to see more rows

# a. 

# Here is the SES model with respect to the number of pigs slaughtered in 
# Victoria 

fit_pigs <- pigs %>% 
  model(SES = ETS(Count ~ error("A") + trend("N") + season("N")))

# The SES model is fitted automatically to minimize the SSE. Thus, 
# the optimal value of the weight and the level can found below:

tidy(fit_pigs)

## # A tibble: 2 × 5
##   Animal State    .model term    estimate
##   <fct>  <fct>    <chr>  <chr>      <dbl>
## 1 Pigs   Victoria SES    alpha      0.322
## 2 Pigs   Victoria SES    l[0]  100647.

# The optimal weight is 0.322 and the optimal level value is 
# 100,647. An extremely large number, but I guess it makes sense 
# considering the data. 

# Now, generating the forecasts for the next four months. The data 
# is already in year-month intervals, so h = 4. 

fc_pigs <- fit_pigs %>% 
  forecast(h = 4)
View(fc_pigs)

# All four point forecasts have the same value which makes sense since 
# it acts like the naive method (since this method is used for time series
# that has no clear seasonal patterns or trend).

# To plot the foreacst, I am going to start the data from 2010. 

pigs1 <- pigs %>% 
  filter(year(Month) >= 2010)

fc_pigs1 <- pigs1 %>% 
  model(SES = ETS(Count ~ error("A") + trend("N") + season("N"))) %>% 
  forecast(h = 4) %>% 
  autoplot(pigs1) + 
  labs(y = "Number of Pigs Slaughtered", title = "Victorian Pigs Slaughtered forecast for the Next Four Months")
fc_pigs1

# b. 


# The prediction interval at the 95% level calculated by R for the first forecast 
# is found from the hilo() function 

pi <- fc_pigs %>% 
  hilo(level = 95)
View(pi)

# which is [76,854.79, 113,518.3].

# Calculating the prediction interval by hand consisted of finding the variance 
# of the residuals from the report() function


report(fit_pigs)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

# and plugging it into the Forecast variance formula for the (A,N,N) model from 
# slide 90/95. Since for the first forecast h = 1, the value for the Variance 
# of the forecast ends up being equivalent to the variance of the residuals. You
# then take the square root of the value: 


std <- sqrt(87480760)
std

## [1] 9353.115

# Then, multiply this value by 1.96 (the c value for the 95 percent level) which
# equals 

int <- (std)*(1.96)
int

## [1] 18332.11

# Finally, you add and subtract the value to the first forecast. 

y <- 95186.56

sub <- y - int

add <- y + int

# Therefore, the interval calculated by hand is [76,854.45, 113,518.67] which 
# is basically the approximate value found by R!

Exercise 2

# I am going to choose Argentina as my country to analyze. I also
# convert it into a tsibble.

arg <- global_economy %>% 
  filter(Country == "Argentina") %>% 
  select(Year, Country, Exports) %>% 
  as_tsibble(index = Year)
arg

## # A tsibble: 58 x 3 [1Y]
## # Key:       Country [1]
##     Year Country   Exports
##    <dbl> <fct>       <dbl>
##  1  1960 Argentina    7.60
##  2  1961 Argentina    5.99
##  3  1962 Argentina    4.69
##  4  1963 Argentina    7.89
##  5  1964 Argentina    5.56
##  6  1965 Argentina    6.23
##  7  1966 Argentina    6.65
##  8  1967 Argentina    7.50
##  9  1968 Argentina    6.48
## 10  1969 Argentina    6.40
## # … with 48 more rows
## # ℹ Use `print(n = ...)` to see more rows

# a.



autoplot(arg)

## Plot variable not specified, automatically selected `.vars = Exports`

# There is no clear seasonal patterns within the time series, and
# there is an random incline within the data at the start of the year 
# 2000. 

# I am going to use STL decomposition to examine the components different
# components of the data.

arg %>% 
  model(STL(Exports ~ trend() + season(), robust = TRUE)) %>% 
  components() %>% 
  autoplot()

# It isn't posting the seasonal component, so I'm anticipating that there 
# doesn't exist any seasonality. Therefore, Holt's linear trend or SES method 
# is applicable. 



# b.



# Using a SES model with cross-validation to forecast the next 10 years.

# The fit of the model

fit <- arg %>% 
  model(SES = ETS(Exports ~ error("A") + trend("N") + season("N")))
 

# Forecasting it for the next 10 years so h = 10

fc <- fit %>% 
  forecast(h = 10)

# Now plotting it

fc %>% 
  autoplot(arg)

# A really big prediction interval that is normally distributed. The point 
# forecast also seems to have a mean of zero. 



# c. 



# Using the accuracy function to compute the RMSE values for the training data.

# First, I must create the training set data and my test set.

train <- arg %>% 
  filter(Year <= 2002)

test <- arg %>% 
  filter(Year > 2002)

# The training set is about 70% of the entire data set. 

# The forecast of the training set:

fc1 <- train %>% 
  model(SES = ETS(Exports ~ error("A") + trend("N") + season("N"))) %>% 
  forecast(h = 15)

# And now the RMSE values for the training data:

accuracy(fc1, arg)

## # A tibble: 1 × 11
##   .model Country   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES    Argentina Test  -8.38  9.65  8.38 -57.0  57.0  4.82  3.10 0.786

# which is 9.65, a very high number for RMSE values.



# d. 



# To compare, I am going to create a forecast using Holt's linear trend
# method with the model below. This method is best for use when the time 
# series contains a trend. For Holt's linear trend, it is an additive 
# trend method.

# The fit:

fit2 <- arg %>% 
  model(Holt = ETS(Exports ~ error("A") + trend("A") + season("N")))

# The forecast

fc2 <- fit2 %>% 
  forecast(h = 10)

# And now the plot

fc2 %>% 
  autoplot(arg)

# I now am creating a forecast using the training data to compute the RMSE

fc3 <- train %>% 
  model(Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))) %>% 
  forecast(h = 15)

# Finally, the RMSE

accuracy(fc3, arg)

## # A tibble: 1 × 11
##   .model Country   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt   Argentina Test  -10.3  12.3  10.3 -71.4  71.5  5.95  3.95 0.795

# The RMSE Holt's training data is higher than the SES method. 


# e. 


# From the STL decomposition, there seems to be a slight increasing trend in the data. 
# Thus, Holt's linear method seems to be the winner even though the huge random incline 
# caused the RMSE to be larger than the SES method. The SES method is best for whenever there
# isn't any seasonality or trend. Therefore, Holt's linear method is the best out of 
# these two forecasts.

Exercise 3

# Chinese GDP time series

china <- global_economy %>% 
  filter(Country == "China") %>% 
  select(Year, Country, GDP) %>% 
  as_tsibble(index = Year)
china

## # A tsibble: 58 x 3 [1Y]
## # Key:       Country [1]
##     Year Country          GDP
##    <dbl> <fct>          <dbl>
##  1  1960 China   59716467625.
##  2  1961 China   50056868958.
##  3  1962 China   47209359006.
##  4  1963 China   50706799903.
##  5  1964 China   59708343489.
##  6  1965 China   70436266147.
##  7  1966 China   76720285970.
##  8  1967 China   72881631327.
##  9  1968 China   70846535056.
## 10  1969 China   79705906247.
## # … with 48 more rows
## # ℹ Use `print(n = ...)` to see more rows

# To forecast the data, I am going to visualize the data 
# first 

autoplot(china)

## Plot variable not specified, automatically selected `.vars = GDP`

# There is no seasonality or cyclic patterns in any sense. 
# There is a nonlinear increasing trend though. Thus, I am going 
# to employ methods that don't involve any seasonality:
# SES, additive damped trend, and Holt's linear trend. 


# I am going to plot the auto and then add various options within the ETS 
# model to compare. 

fc <- china %>% 
  model(auto = ETS(GDP)) %>% 
  forecast(h = 20) %>% 
  autoplot(china)
fc

# The graph visually changed, its more flat compared to the original graph. 
# I'm guessing it's because the graph zoomed out so much from the prediction 
# interval. 

# Now, I am adding various options within the ETS method to create a damped trend,
# Holt's linear trend, and an SES (since all three methods don't require seasonality)
# to see the differences amongst them.

# First, the Damped trend (additive)

china %>% 
  model(damp = ETS(GDP ~ error("A") + trend("Ad") + season("N"))) %>% 
  forecast(h = 20) %>% 
  autoplot(china)

# The damped trend is exactly what it sound like - an trend that dips down (increasing
# at a decreasing rate). It seems to fit the data pretty well, but I would have to know 
# more information to know if the data would damp like that in the future. 

# Next, Holt's linear trend method:

china %>% 
  model(holt = ETS(GDP ~ error("A") + trend("A") + season("N"))) %>% 
  forecast(h = 20) %>% 
  autoplot(china)

# Holt's linear method produces a linear point forecast with a trend. This seems to fit
# the data's predicted forecast pretty nicely. 

# Now, I want to employ an SES method to the data. 

china %>% 
  model(SES = ETS(GDP ~ error("A") + trend("N") + season("N"))) %>% 
  forecast(h = 20) %>% 
  autoplot(china)

# The SES method is like Holt's linear trend method, except it doesn't have an inc/dec 
# trend. Thus, this forecast doesn't seem to fit well. 

# Now, I am going to find the correct lambda to find the correct transformation to balance the 
# variance, if there is one. 

china %>% 
  features(GDP, features = guerrero)

## # A tibble: 1 × 2
##   Country lambda_guerrero
##   <fct>             <dbl>
## 1 China           -0.0345

# The value is pretty close to zero. Thus, log is the appropriate transformation to 
# employ. 

china1 <- china %>% 
  mutate(GDP = log(GDP))

# Let's check the graph.

autoplot(china1)

## Plot variable not specified, automatically selected `.vars = GDP`

# The variance is indeed more balanced through out the data. I am now going 
# to use this data to forecast with an ETS model

china1 %>% 
  model(auto = ETS(GDP)) %>% 
  forecast(h = 20) %>% 
  autoplot(china1)

# The forecast seems pretty nice with a more balanced variance! 

# I want to check the accuracy of each method to see which one has the 
# lowest RMSE. 

china %>% 
  stretch_tsibble(.init = 10) %>% 
  model(SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
        Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
        Damp = ETS(GDP ~ error("A") + trend("Ad") + season("N"))) %>% 
  forecast(h = 20) %>% 
  accuracy(china)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 20 observations are missing between 2018 and 2037

## # A tibble: 3 × 11
##   .model Country .type      ME    RMSE     MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <fct>   <chr>   <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damp   China   Test  1.37e12 2.74e12 1.45e12  38.1  41.3  6.69  6.55 0.869
## 2 Holt   China   Test  1.28e12 2.61e12 1.38e12  35.9  39.3  6.37  6.22 0.867
## 3 SES    China   Test  1.95e12 3.45e12 1.95e12  55.2  55.4  9.01  8.23 0.878

# It seems that Holt's linear trend method works best out of these 
# three. I am also going to check the auto ETS method:

china %>% 
  model(ETS(GDP)) %>% 
  accuracy()

## # A tibble: 1 × 11
##   Country .model   .type        ME    RMSE     MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>    <chr>     <dbl>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China   ETS(GDP) Traini… 4.13e10 2.00e11 9.66e10  2.11  7.72 0.446 0.477 0.268

# Of course the auto method has the lowest RMSE. 

# What about the auto ETS model with the box-cox transformation?

china1 %>% 
  model(ETS(GDP)) %>% 
  accuracy()

## # A tibble: 1 × 11
##   Country .model   .type        ME   RMSE    MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <fct>   <chr>    <chr>     <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China   ETS(GDP) Training 0.0116 0.0906 0.0722 0.0434 0.272 0.620 0.687 0.179

# Look how low the RMSE is! This is definitely the best forecast overall!

Exercise 4

# Gas data from aus_production 

gas <- aus_production %>% 
  select(Quarter, Gas) %>% 
  as_tsibble(index = Quarter)
gas

## # A tsibble: 218 x 2 [1Q]
##    Quarter   Gas
##      <qtr> <dbl>
##  1 1956 Q1     5
##  2 1956 Q2     6
##  3 1956 Q3     7
##  4 1956 Q4     6
##  5 1957 Q1     5
##  6 1957 Q2     7
##  7 1957 Q3     7
##  8 1957 Q4     6
##  9 1958 Q1     5
## 10 1958 Q2     7
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows

# To find an ETS model that fits best, I need to visualize the 
# data

autoplot(gas)

## Plot variable not specified, automatically selected `.vars = Gas`

# There is apparent seasonality and an increasing trend. The seasonal
# patterns' variations are changing proportional to the level of the
# series. Thus, the season component should be multiplicative. Since there
# is an increasing trend, that maybe has a slight damp, the trend component 
# should have either additive or damped in it. I will do methods
# that deal with seasonality with different trend options to compare.

# First, Holt-Winters' multiplicative method:

# before I forecast it, I am going to check the residuals

fit1 <- gas %>% 
  model(ETS(Gas ~ error("A") + trend("A") + season("M")))
fit1

## # A mable: 1 x 1
##   `ETS(Gas ~ error("A") + trend("A") + season("M"))`
##                                              <model>
## 1                                       <ETS(A,A,M)>

gg_tsresiduals(fit1)

# The variance of the residuals is not constant at all, causing the 
# calculation of the prediction interval hard to be made. There is also
# some prevalent autocorrelation. The mean is centered around 0 which is 
# good. To check is the residuals are distinguishable from white noise, I 
# am going to check the significance using the ljung-box test

# Since the seasonality is occuring about once a year, lag = 2m = 
# 2(1) = 2.

aug <- augment(fit1)

aug %>% 
  features(.innov, ljung_box, lag = 2, dof = 0)

## # A tibble: 1 × 3
##   .model                                                   lb_stat lb_pvalue
##   <chr>                                                      <dbl>     <dbl>
## 1 "ETS(Gas ~ error(\"A\") + trend(\"A\") + season(\"M\"))"    2.12     0.346

# Since p value > 0.05, the residuals are not significant at the 
# 5 percent level. Thus, the residuals are not distinguishable from 
# a white noise which is what we want. 

gas %>% 
  model(ETS(Gas ~ error("A") + trend("A") + season("M"))) %>% 
  forecast(h = 36) %>% 
  autoplot(gas)

report(fit1)

## Series: Gas 
## Model: ETS(A,A,M) 
##   Smoothing parameters:
##     alpha = 0.6132245 
##     beta  = 0.007863848 
##     gamma = 0.2244134 
## 
##   Initial states:
##      l[0]      b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  3.620487 0.6098927 0.9796171 1.147619 1.075495 0.7972686
## 
##   sigma^2:  18.2237
## 
##      AIC     AICc      BIC 
## 1816.462 1817.328 1846.923

# These are the parameters estimated for Holt-Winters' multiplicative 
# method (with an additive error term).

# This forecast looks nice!

# Now, Holt-Winters' damped method:

# Checking the residuals

fit2 <- gas %>% 
  model(ETS(Gas ~ error("A") + trend("Ad") + season("M")))

gg_tsresiduals(fit2)

# The residuals seems to follow the same patterns from Holt-Winters' 
# multiplicative method. To see if the residuals are distinguishable from 
# white noise:

aug1 <- augment(fit2)

aug1 %>% 
  features(.innov, ljung_box, lag = 2, dof = 0)

## # A tibble: 1 × 3
##   .model                                                    lb_stat lb_pvalue
##   <chr>                                                       <dbl>     <dbl>
## 1 "ETS(Gas ~ error(\"A\") + trend(\"Ad\") + season(\"M\"))"    2.99     0.224

# The p value is greater than 0.05, so it's not distinguishable from 
# white noise, however it's less than the p value from Holt-Winters' 
# multiplicative method. Thus, the last method extracted the data 
# from the residuals more than this one. 

gas %>% 
  model(ETS(Gas ~ error("A") + trend("Ad") + season("M"))) %>% 
  forecast(h = 36) %>% 
  autoplot(gas)

# This forecast looks so nice! I think the damped trend improve the forecast
# from a visual standpoint.

# I am going to allow the ETS() function select the model by minimising the AICc

fit3 <- gas %>% 
  model(ETS(Gas))

report(fit3)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

# The model chosen was ETS(M,A,M). Gamma is smaller in this example
# than from Holt-Winters' multi with the additive error term. This means 
# that for the ETS(M,A,M) model, the seasonality doesn't change us much 
# overtime. 

fit4 <- gas %>% 
  model(ETS(Gas ~ error("M") + trend("A") + season("M")))

fc4 <- fit4 %>% 
  forecast(h = 36)

fc4 %>% 
  autoplot(gas)

# This forecast definitely seems the most appropriate. I am going to compare the 
# components from the additive error term to the multi.

# Additive error term:

components(fit1) %>% 
  autoplot()

## Warning: Removed 4 row(s) containing missing values (geom_path).

# Multi error term:

components(fit4) %>% 
  autoplot()

## Warning: Removed 4 row(s) containing missing values (geom_path).

# The trend didn't change by much, but the seasonal patterns do not change us much 
# for the multi errors compared to the additive. The slope is also very different.

# Now, I'm checking the residuals. Note that since the model has multi errors,
# the innovation residuals are not equivalent to the regular residuals.

aug3 <- augment(fit4)

# residuals:

ggplot(aug3, aes(x = Quarter, y = .resid)) +
  geom_line()

# innovative residuals:

ggplot(aug3, aes(x = Quarter, y = .innov)) +
  geom_line()

# The innovative residuals have more of a constant variance even though it still 
# isn't very balanced. 

gg_tsresiduals(fit4)

# There seems to be a lot of autocorrelation which is not good. 
# The mean is centered around zero which is good. 

# Even though the ETS function chose the appropriate model, I am 
# going to employ a damped trend to the ETS(M,A,M) model.

fit5 <- gas %>% 
  model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))

fc5 <- fit5 %>% 
  forecast(h = 36)

fc5 %>% 
  autoplot(gas)

# The forecast looks the same except with a damped trend. I am going to check
# the parameters and AICc.

report(fit5)

## Series: Gas 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6489044 
##     beta  = 0.1551275 
##     gamma = 0.09369372 
##     phi   = 0.98 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]   s[-2]     s[-3]
##  5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
## 
##   sigma^2:  0.0033
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873

# The AICc only went up by 5 units. The parameters are very similar to 
# one another. Now, I am going to check the residuals. 

gg_tsresiduals(fit5)

# It seems to fail to the same axioms as before: not a constant variance in the 
# residuals and autocorrelation. The mean is centered around the mean. Thus, the 
# damped trend did not improve the forecast in this case. Therefore, I am sticking
# with the ETS(M,A,M) model!

Exercise 5

# My set seed is 8 (my number in soccer)

set.seed(8)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries

## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry                  `Series ID`    Month Turnover
##    <chr>           <chr>                     <chr>          <mth>    <dbl>
##  1 New South Wales Household goods retailing A3349397X   1982 Apr     211.
##  2 New South Wales Household goods retailing A3349397X   1982 May     224.
##  3 New South Wales Household goods retailing A3349397X   1982 Jun     216.
##  4 New South Wales Household goods retailing A3349397X   1982 Jul     226.
##  5 New South Wales Household goods retailing A3349397X   1982 Aug     217.
##  6 New South Wales Household goods retailing A3349397X   1982 Sep     213.
##  7 New South Wales Household goods retailing A3349397X   1982 Oct     224.
##  8 New South Wales Household goods retailing A3349397X   1982 Nov     244.
##  9 New South Wales Household goods retailing A3349397X   1982 Dec     393.
## 10 New South Wales Household goods retailing A3349397X   1983 Jan     220.
## # … with 431 more rows
## # ℹ Use `print(n = ...)` to see more rows

myseries %>% autoplot(Turnover)

View(myseries)


# a. 


# Multiplicative seasonality is necessary since the proportionality of seasonal
# patterns change to different levels of time.



# b. 


# Applying Holt-Winters' multiplicative method to the data. 

fit <- myseries %>% 
  model(multi = ETS(Turnover ~ error("A") + trend("A") + season("M")))

fc <- fit %>% 
  forecast(h = 36)

fc %>% 
  autoplot(myseries)

# The forecast looks very nice. Now, experimenting with the ETS model
# by making the trend damped. 

fit1 <- myseries %>% 
  model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M")))

fc1 <- fit1 %>% 
  forecast(h = 36)

fc1 %>% 
  autoplot(myseries)

# This forecast has a damping trend which seems applicable for this type
# of time series. 


# c. 


# I am now comparing the RMSE of the one-step forecasts from the two methods
# with cross-validation.

# My data for the Holt-Winters' multi:

# myseries %>% 
#   model(ETS(Turnover ~ error("A") + trend("A") + season("M"))) %>% 
#   forecast(h = 1) %>% 
#   accuracy(myseries)

# For the damped Holt-Winters' multi:

# myseries %>% 
#   model(ETS(Turnover ~ error("A") + trend("Ad") + season("M))) %>% 
#   forecast(h = 1) %>% 
#   accuracy(myseries)


# My RMSE wouldn't load, so I am going to have to take a 
# Loss here :(




# d. 



# Checking the residuals of the ETS(A,Ad,M) model.

gg_tsresiduals(fit1)

# The variance of the innovative residuals seem constant with a few outliers. 
# There exists some autocorrelation and the mean is centered around 0. I am 
# using the ljung-box test to check the significance of my residuals. lag = 2(m) = 
# 2(12) = 24 and dof = 2.

augment(fit1) %>% 
  features(.innov, ljung_box, lag = 24, dof = 2)

## # A tibble: 1 × 5
##   State           Industry                  .model lb_stat   lb_pvalue
##   <chr>           <chr>                     <chr>    <dbl>       <dbl>
## 1 New South Wales Household goods retailing damp      71.7 0.000000356

# Since the p value is greater than 0.05, the residuals are not significant at 
# the 5 percent level, meaning the residuals are not distinguishable from white 
# noise. 

aug4 <- augment(fit1)

ggplot(aug4, aes(x = Month, y = .resid)) +
  geom_line()

# The data seems to look like white noise with a constant variance across with
# the exception of a few outliers.


# e. 



# I am now comparing the RMSE between the two methods with training 
# and test sets.

series <- myseries %>% 
  as_tsibble(index = Month)
series

## # A tsibble: 441 x 5 [1M]
## # Key:       State, Industry [1]
##    State           Industry                  `Series ID`    Month Turnover
##    <chr>           <chr>                     <chr>          <mth>    <dbl>
##  1 New South Wales Household goods retailing A3349397X   1982 Apr     211.
##  2 New South Wales Household goods retailing A3349397X   1982 May     224.
##  3 New South Wales Household goods retailing A3349397X   1982 Jun     216.
##  4 New South Wales Household goods retailing A3349397X   1982 Jul     226.
##  5 New South Wales Household goods retailing A3349397X   1982 Aug     217.
##  6 New South Wales Household goods retailing A3349397X   1982 Sep     213.
##  7 New South Wales Household goods retailing A3349397X   1982 Oct     224.
##  8 New South Wales Household goods retailing A3349397X   1982 Nov     244.
##  9 New South Wales Household goods retailing A3349397X   1982 Dec     393.
## 10 New South Wales Household goods retailing A3349397X   1983 Jan     220.
## # … with 431 more rows
## # ℹ Use `print(n = ...)` to see more rows

train <- series %>% 
  filter(year(Month) <= 2010)

test <- series %>% 
  filter(year(Month) > 2010)


fc_train <- train %>% 
  model(multi = ETS(Turnover ~ error("A") + trend("A") + season("M"))) %>% 
  forecast(h = 8)

accuracy(fc_train, series)

## # A tibble: 1 × 12
##   .model State   Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>   <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 multi  New So… Househ… Test  -29.2  31.5  29.2 -2.81  2.81 0.715 0.617 0.00532
## # … with abbreviated variable name ¹Industry

# The RMSE for the residuals for the training set for the ETS(A,A,M) model is 
# 31.3. Now, for the ETS(A,Ad,M) model. 


fc_train1 <- train %>% 
  model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M"))) %>% 
  forecast(h = 8)

accuracy(fc_train1, series)

## # A tibble: 1 × 12
##   .model State   Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>   <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 damp   New So… Househ… Test  -18.1  22.8  21.2 -1.76  2.04 0.520 0.445 -0.0618
## # … with abbreviated variable name ¹Industry

# The RMSE for this model is 22.6 which is slightly lower than the first one, 
# so obviously I prefer the ETS(A,Ad,M) model based on the test set. 


# The seasonal naive approach is below.

fit2 <- train %>% 
  model(SN = SNAIVE(Turnover ~ lag(12)))

fc2 <- fit2 %>% 
  forecast(h = 8)

accuracy(fc2, series)

## # A tibble: 1 × 12
##   .model State     Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SN     New Sout… Househ… Test  -21.7  28.5  21.7 -2.04  2.04 0.532 0.558 0.137
## # … with abbreviated variable name ¹Industry

# The RMSE is 28.5, so the ETS(A,Ad,M) model beat the seasonal
# naive approach! 

series %>% 
  model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M"))) %>% 
  forecast(h = 36) %>% 
  autoplot(series)

# The forecast seems reasonable!!

Exercise 6

# The total domestic overnight tips across Australia 


trips <- tourism %>% 
  summarise(Trips = sum(Trips)) %>% 
  as_tsibble(index = Quarter)
trips

## # A tsibble: 80 x 2 [1Q]
##    Quarter  Trips
##      <qtr>  <dbl>
##  1 1998 Q1 23182.
##  2 1998 Q2 20323.
##  3 1998 Q3 19827.
##  4 1998 Q4 20830.
##  5 1999 Q1 22087.
##  6 1999 Q2 21458.
##  7 1999 Q3 19914.
##  8 1999 Q4 20028.
##  9 2000 Q1 22339.
## 10 2000 Q2 19941.
## # … with 70 more rows
## # ℹ Use `print(n = ...)` to see more rows

# a. 


# plotting the data 


autoplot(trips)

## Plot variable not specified, automatically selected `.vars = Trips`

# The seasonality doesn't seem to be clear. There is an constant trend in the data
# and then a sudden increase in the trend. This could indicate some cyclic behaviuor.


# b. 


# Now, I am decomposing the series to observe the different components
# and collect the seasonally adjusted data (y - s).


stl <- trips %>% 
  model(STL(Trips ~ season() + trend())) %>% 
  components()
View(stl)

# Now selecting the seasonally adjusted data 

adj <- stl %>% 
  select(season_adjust)
adj

## # A tsibble: 80 x 2 [1Q]
##    season_adjust Quarter
##            <dbl>   <qtr>
##  1        21939. 1998 Q1
##  2        20670. 1998 Q2
##  3        20566. 1998 Q3
##  4        20989. 1998 Q4
##  5        20843. 1999 Q1
##  6        21801. 1999 Q2
##  7        20652. 1999 Q3
##  8        20196. 1999 Q4
##  9        21093. 2000 Q1
## 10        20280. 2000 Q2
## # … with 70 more rows
## # ℹ Use `print(n = ...)` to see more rows

# I am plotting it just to observe.

adj %>% 
  autoplot()

## Plot variable not specified, automatically selected `.vars = season_adjust`

# It hasn't changed much from the original graph, indicating that there wasn't
# much seasonality within it to begin with.



# c.



# The code from the homework wasn't working, so I'm creating the ETS model using 
# the seasonally adjust data.

stl_fit <- adj %>% 
  model(ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))
stl_fit

## # A mable: 1 x 1
##   `ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))`
##                                                         <model>
## 1                                                 <ETS(A,Ad,N)>

# Now forecasting the model when h = 8 (8 quarters)

stl_fc <- stl_fit %>% 
  forecast(h = 8)

# I want to see how the forecast looks on the graph 

stl_fc %>% 
  autoplot(adj)

# It looks reasonable.

# d. 

# Now creating a Holt linear model based on the seasonally adjusted data. 

stl_fit1 <- adj %>% 
  model(ETS(season_adjust ~ error("A") + trend("A") + season("N")))

stl_fc1 <- stl_fit1 %>% 
  forecast(h = 8)

# Now plotting the forecast

stl_fc1 %>% 
  autoplot(adj)

# This forecast is very similar to the ETS model from before except the trend
# is not damped. 


# e. 


# To find a seasonal model, I am going to use the original data instead of the 
# seasonally adjusted data. 

trip_fit <- trips %>% 
  model(ETS(Trips)) %>% 
  report()

## Series: Trips 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.4495675 
##     beta  = 0.04450178 
##     gamma = 0.0001000075 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]
##  21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
## 
##   sigma^2:  699901.4
## 
##      AIC     AICc      BIC 
## 1436.829 1439.400 1458.267

# It returned an ETS(A,A,A) model (so additive within each component) which is 
# called Additive Holt-Winters' method.



# f. 

# ETS model

trips %>% 
  model(ETS(Trips)) %>% 
  accuracy()

## # A tibble: 1 × 10
##   .model     .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 ETS(Trips) Training  105.  794.  604. 0.379  2.86 0.636 0.653 -0.00151

# The RMSE seems to be very high at 794. Let's compare.

# Season_adj model

adj %>% 
  model(
    damp = ETS(season_adjust ~ error("A") + trend("Ad") + season("N")),
    Holt = ETS(season_adjust ~ error("A") + trend("A") + season("N"))
  ) %>% 
  accuracy()

## # A tibble: 2 × 10
##   .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 damp   Training  87.6  746.  566. 0.301  2.67 0.594 0.618 -0.00831
## 2 Holt   Training  83.7  746.  563. 0.289  2.66 0.590 0.617 -0.00833

# The damped trend has a RMSE of 746 and Holt's linear method has an RMSE
# of 746 too. Since they both have the same RMSE value, we can now choose based
# on the MAE score. Holt's linear method has the lowest MAE sore of the two. Thus,
# this would be the prefferred model over all three methods so far.



# g. 



# Now, I am comparing the forecasts of the three by computing the RMSE values 
# and analyzing the three separate plots of the forecast. 

# I have already plotted the forecasts of the seasonally adjusted data using 
# Holt's linear method and with a damped trend, so I am now going to plot the 
# ETS() generated model (ETS(A,A,A)).

trips %>% 
  model(ETS(Trips)) %>% 
  forecast(h = 8) %>% 
  autoplot(trips)

# Honestly, graphically wise, this model looks the best out of all three. To be 
# precise in my decision making, I am going to compute the RMSE of all three 
# forecast using the accuracy() function. Cross-validation is also used in the 
# process.

trips %>% 
  stretch_tsibble(.init = 10) %>% 
  model(ETS(Trips)) %>% 
  forecast(h = 8) %>% 
  accuracy(trips)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 8 observations are missing between 2018 Q1 and 2019 Q4

## # A tibble: 1 × 10
##   .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Test   466. 1403. 1118.  1.72  5.11  1.18  1.16 0.659

# The RMSE for this forecast is 1403. Seem pretty high but let's compare.

adj %>% 
  stretch_tsibble(.init = 10) %>% 
  model(
    damp = ETS(season_adjust ~ error("A") + trend("Ad") + season("N")),
    Holt = ETS(season_adjust ~ error("A") + trend("A") + season("N"))
    ) %>% 
  forecast(h = 8) %>% 
  accuracy(adj)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 8 observations are missing between 2018 Q1 and 2019 Q4

## # A tibble: 2 × 10
##   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damp   Test   419. 1315. 1052.  1.61  4.83  1.10  1.09 0.694
## 2 Holt   Test   517. 1474. 1164.  2.11  5.39  1.22  1.22 0.736

# The RMSE is 1315 for the damped trend method and 1474 for Holt's linear method.
# Thus, according to the RMSE, the damped trend method is best for the forecast
# when it's seasonally adjusted data. I could agree with this based on the graph.
# I am going to choose the damped trend for my overall forecast since it has the 
# lowest RMSE for the forecast, tied for the lowest RMSE for the fit, and represents
# the data's forecast best graphically. 


# h. 


# I am now checking the residuals of the damped trend method with the seasonally 
# adjusted data. 

gg_tsresiduals(stl_fit)

# The residuals overall have a constant variance with a few outliers, there seems
# to be a little autocorrelation, and the mean is centered around zero with an 
# outlier. I'm going to run the ljung-box test to see if the residuals are 
# significant. Lag = 10 since there seems to be no seasonality. 

augment(stl_fit) %>% 
  features(.innov, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 × 3
##   .model                                                         lb_stat lb_pv…¹
##   <chr>                                                            <dbl>   <dbl>
## 1 "ETS(season_adjust ~ error(\"A\") + trend(\"Ad\") + season(\"…    9.08   0.525
## # … with abbreviated variable name ¹lb_pvalue

# The p value is greater than 0.05 so the residuals are not significant at the 
# 5 percent level, aka the residuals are indistinguishable from white noise!

Exercise 7

# NZ to Australia 

nz <- aus_arrivals %>% 
  filter(Origin == "NZ") %>% 
  as_tsibble(index = Quarter)
View(nz)



# a. 



autoplot(nz)

## Plot variable not specified, automatically selected `.vars = Arrivals`

# Yearly seasonality seems to roughly exist. There is also an increasing trend. 



# b. 



# Creating a training set that consists of all years except for the last two 

train <- nz %>% 
  filter(year(Quarter) <= 2010)
View(train)

test <- nz %>% 
  filter(year(Quarter) > 2010)

recent <- nz %>% 
  filter(year(Quarter) >= 1981)

# Now I am forecasting the test set (and comparing) using the Holt-Winters' 
# multiplicative method 

fit <- train %>% 
  model(ETS(Arrivals ~ error("M") + trend("A") + season("M")))

fc <- fit %>% 
  forecast(h = 8)

# I am now plotting it to view it graphically 


fc %>% 
  autoplot(nz)

# The forecast seems to fit the test set pretty accurately. 


# c. 


# The variance of the seasonality is not constant throught out the data, for it
# proportionally changes as time continues. Therefore, multiplicative seasonality 
# is necessary for the forecast. 


# d. 



# I am now forecasting the two year test set along with the actual data for the 
# following methods (without the PI):

# i) an ETS model (the auto method?)

train %>% 
  model(ETS(Arrivals)) %>% 
  report()

## Series: Arrivals 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6481867 
##     beta  = 0.000100016 
##     gamma = 0.2271947 
## 
##   Initial states:
##      l[0]     b[0]     s[0]    s[-1]    s[-2]     s[-3]
##  75543.76 2033.626 1.001149 1.229188 1.101512 0.6681509
## 
##   sigma^2:  0.0082
## 
##      AIC     AICc      BIC 
## 2856.290 2857.926 2881.377

# This Holt-Winters' multi method 

fc1 <- train %>% 
  model(ETS(Arrivals)) %>% 
  forecast(h = 8)

fc1 %>% 
  autoplot(nz)

# ii) ETS model applied to a log transformed series

log <- nz %>% 
  mutate(Arrivals = log(Arrivals))

train2 <- log %>% 
  filter(year(Quarter) <= 2010)

train2 %>% 
  model(ETS(Arrivals)) %>% 
  report()

## Series: Arrivals 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.6827185 
##     gamma = 0.2231326 
## 
##   Initial states:
##      l[0]       s[0]     s[-1]     s[-2]      s[-3]
##  11.18255 0.01514111 0.2412994 0.1150147 -0.3714552
## 
##   sigma^2:  0.0079
## 
##       AIC      AICc       BIC 
##  2.073627  3.073627 21.586070

# This ETS method says there's no trend but an additive seasonal component. The 
# AICc is 3.07

fc2 <- train2 %>% 
  model(ETS(Arrivals)) %>% 
  forecast(h = 8)

fc2 %>% 
  autoplot(log)

# I feel as if damped trend would look best for this case though.

train2 %>% 
  model(ETS(Arrivals ~ error("A") + trend("Ad") + season("A"))) %>% 
  report()

## Series: Arrivals 
## Model: ETS(A,Ad,A) 
##   Smoothing parameters:
##     alpha = 0.5595716 
##     beta  = 0.09409677 
##     gamma = 0.2153613 
##     phi   = 0.8219273 
## 
##   Initial states:
##      l[0]        b[0]       s[0]     s[-1]     s[-2]     s[-3]
##  11.28025 -0.08118968 0.02259455 0.2404159 0.1114745 -0.374485
## 
##   sigma^2:  0.0079
## 
##       AIC      AICc       BIC 
##  3.770480  5.788828 31.645397

# The AICc is 5.78

fc3 <- train2 %>% 
  model(ETS(Arrivals ~ error("A") + trend("Ad") + season("A"))) %>%
  forecast(h = 8)

fc3 %>% 
  autoplot(log)

# The forecasts actually seem very identical to one another.Thus, I 
# am going to rely on the AICc value for this one. 


# iii) seasonal naive method

# The lag will be equal to 4 since it's quarterly data with yearly seasonality


fc4 <- train %>% 
  model(seasonal_naive = SNAIVE(Arrivals ~ lag(4))) %>% 
  forecast(h = 8)

fc4 %>% 
  autoplot(nz)

# iv) an STL decomposition applied to the log transformed data followed by an 
# ETS model applied to the seasonally adjusted (transformed) data.


# Using the log transformed data, I am going to employ an STL decomposition to 
# find my seasonally adjusted data 

stl_fit <- train2 %>% 
  model(STL(Arrivals ~ season() + trend())) %>% 
  components()
View(stl_fit)

adj <- stl_fit %>% 
  select(Quarter, season_adjust)
adj

## # A tsibble: 120 x 2 [1Q]
##    Quarter season_adjust
##      <qtr>         <dbl>
##  1 1981 Q1          11.1
##  2 1981 Q2          11.3
##  3 1981 Q3          11.2
##  4 1981 Q4          11.0
##  5 1982 Q1          11.0
##  6 1982 Q2          10.9
##  7 1982 Q3          11.0
##  8 1982 Q4          10.9
##  9 1983 Q1          11.0
## 10 1983 Q2          10.8
## # … with 110 more rows
## # ℹ Use `print(n = ...)` to see more rows

# Here is the plot of the seasonally adjusted data

adj %>% 
  autoplot()

## Plot variable not specified, automatically selected `.vars = season_adjust`

# Now, an ETS model applied to the seasoanlly adjusted data 

adj %>% 
  model(ETS(season_adjust)) %>% 
  report()

## Series: season_adjust 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.7859241 
##     beta  = 0.0001000017 
## 
##   Initial states:
##     l[0]       b[0]
##  11.1245 0.01174078
## 
##   sigma^2:  0.0054
## 
##       AIC      AICc       BIC 
## -45.50147 -44.97515 -31.56401

# The ETS reported the Holt's linear method to be the best (lowest AICc)
# Here is the forecast of the test set.

fc5 <- adj %>% 
  model(ETS(season_adjust)) %>% 
  forecast(h = 8)
fc5

## # A fable: 8 x 4 [1Q]
## # Key:     .model [1]
##   .model             Quarter season_adjust .mean
##   <chr>                <qtr>        <dist> <dbl>
## 1 ETS(season_adjust) 2011 Q1 N(13, 0.0054)  12.6
## 2 ETS(season_adjust) 2011 Q2 N(13, 0.0088)  12.6
## 3 ETS(season_adjust) 2011 Q3  N(13, 0.012)  12.6
## 4 ETS(season_adjust) 2011 Q4  N(13, 0.015)  12.6
## 5 ETS(season_adjust) 2012 Q1  N(13, 0.019)  12.7
## 6 ETS(season_adjust) 2012 Q2  N(13, 0.022)  12.7
## 7 ETS(season_adjust) 2012 Q3  N(13, 0.026)  12.7
## 8 ETS(season_adjust) 2012 Q4  N(13, 0.029)  12.7

# To plot it, I need to find the seasonally adjusted component of the entire
# series

stl_fit2 <- log %>% 
  model(STL(Arrivals ~ season() + trend())) %>% 
  components()

adj1 <- stl_fit2 %>% 
  select(Quarter, season_adjust)
View(adj1)

# Now the test set forecast plot

fc5 %>% 
  autoplot(adj1)

# This forecast doesn't seem to be very reasonable.


# e. 


# I am finding the RMSE values for each forecast using the accuracy() fucntion 
# to choose which method is best. 

# i)

accuracy(fc1, recent)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2012 Q4

## # A tibble: 1 × 11
##   .model      Origin .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>       <chr>  <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(Arriva… NZ     Test  -10467. 18302. 12746. -3.27  4.16 0.855 0.946 -0.0857

# The RMSE for Holt-Winters' multiplicative method has an 
# RMSE of 18302.

# ii) 

accuracy(fc2, log)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2012 Q4

## # A tibble: 1 × 11
##   .model   Origin .type       ME   RMSE    MAE     MPE  MAPE  MASE RMSSE    ACF1
##   <chr>    <chr>  <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(Arr… NZ     Test  -0.00383 0.0444 0.0381 -0.0288 0.303 0.345 0.310 0.00656

# The ETS model that is applied to a log transformation is very low at 0.0444. 
# I wonder if this is because of the units. After looking at the data, it is 
# definitely because of the unit change. It is the same case when comparing the 
# AICc. I tried to compare the accuracy using the innovative residuals, but it 
# was equal to the actual residuals. For now, I will choose the best model based
# on the graph. 


# iii) 

accuracy(fc4, recent)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2012 Q4

## # A tibble: 1 × 11
##   .model         Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>          <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 seasonal_naive NZ     Test  5692. 14962. 14203.  2.27  4.97 0.952 0.773 -0.116

# The seasonal naive model as RMSE of 14962. This is lower than Holt-Winters'
# multi model. So this one will have priority over this one. 


# iv) 

accuracy(fc5, adj1)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2012 Q4

## # A tibble: 1 × 10
##   .model            .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE    ACF1
##   <chr>             <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(season_adjus… Test  -0.0599 0.0702 0.0599 -0.476 0.476 0.541 0.491 -0.0108

# The RMSE is again very low at 0.0702 because of the log transformation. The 
# ETS model applied to a log transformation is the lowest for this case.

# Now I have to compare the two graphs from the ETS log transformed forecast and 
# the seasonal naive model 

# ETS 

fc2 %>% 
  autoplot(log)

# Snaive

fc4 %>% 
  autoplot(nz)

# Honestly, the prediction intervals for the ETS log transformed data looks very
# nice. Therefore, this will be my chosen model.

# I am now checking the residuals of the chosen model, ETS log trans.:


fit2 <- train2 %>% 
  model(ETS(Arrivals))

gg_tsresiduals(fit2)

# The mean seems to be centered around the mean and there is no autocorrelation. 
# The variance could be balanced more, but this will just cause complications 
# for the prediction intervals. 

# To make sure there is not autocorrelation, I am going to employ the ljung-box
# test. Lag = 8 for quarterly data with a yearly seasonality. 

augment(fit2) %>% 
  features(.innov, ljung_box, lag = 8, dof = 0)

## # A tibble: 1 × 4
##   Origin .model        lb_stat lb_pvalue
##   <chr>  <chr>           <dbl>     <dbl>
## 1 NZ     ETS(Arrivals)    6.18     0.627

# It has a very large p value which is desired. The residuals are indistinguishable 
# from white noise! 



# f. 



# I am going to use time series cross-validation to compare the same four methods

stch <- nz %>% 
  stretch_tsibble(.init = 36, .step = 3)

# i) an ETS model 

fc6 <- stch %>% 
  model(ETS(Arrivals)) %>% 
  forecast(h = 3)

accuracy(fc6, nz)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2012 Q4 and 2013 Q1

## # A tibble: 1 × 11
##   .model        Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>         <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals) NZ     Test  4306. 15459. 11940.  2.12  6.47 0.811 0.812 0.254

# The RMSE for the ETS model is 15459.

# ii) ETS applied to a log transformed series

stch1 <- log %>% 
  stretch_tsibble(.init = 36, .step = 3)

fc7 <- stch1 %>% 
  model(ETS(Arrivals)) %>% 
  forecast(h = 3)

accuracy(fc7, log)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2012 Q4 and 2013 Q1

## # A tibble: 1 × 11
##   .model        Origin .type     ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>         <chr>  <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals) NZ     Test  0.0311 0.0921 0.0674 0.255 0.560 0.630 0.659 0.301

# The forecast for this model is 0.0921

# iii) seasonal naive 

fc8 <- stch %>% 
  model(SNAIVE(Arrivals ~ lag(4))) %>% 
  forecast(h = 3)

accuracy(fc8, nz)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2012 Q4 and 2013 Q1

## # A tibble: 1 × 11
##   .model          Origin .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>           <chr>  <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Arrival… NZ     Test  8239. 18682. 14350.  3.82  7.70 0.974 0.981 0.566

# The RMSE is 18682.

# iv) STL decomp. applied to log trans. followed by ETS applied to 
# the seasonally adj data

stl_fit3 <- stch1 %>% 
  model(STL(Arrivals ~ season() + trend())) %>% 
  components()

adj2 <- stl_fit3 %>% 
  select(!c(.model))

fc9 <- adj2 %>% 
  model(ETS(season_adjust)) %>% 
  forecast(h = 3)
View(fc9)

accuracy(fc9, adj1)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 2 observations are missing between 2012 Q4 and 2013 Q1

## # A tibble: 1 × 10
##   .model             .type     ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>              <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(season_adjust) Test  0.0294 0.0817 0.0618 0.241 0.511 0.577 0.585 0.405

# The RMSE of this model is 0.0817. 

# The results actually did change. The original ETS model became 
# more suitable than the snaive model, and the ETS of the log trans. seasonally 
# adjusted data became better than the ETS log trans. model, so opposite from 
# the results from before.

Exercise 8

# a. 

# manipulating the data

View(aus_production)

cem <- aus_production %>% 
  select(Quarter, Cement) %>% 
  as_tsibble(index = Quarter)
cem

## # A tsibble: 218 x 2 [1Q]
##    Quarter Cement
##      <qtr>  <dbl>
##  1 1956 Q1    465
##  2 1956 Q2    532
##  3 1956 Q3    561
##  4 1956 Q4    570
##  5 1957 Q1    529
##  6 1957 Q2    604
##  7 1957 Q3    603
##  8 1957 Q4    582
##  9 1958 Q1    554
## 10 1958 Q2    620
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows

autoplot(cem)

## Plot variable not specified, automatically selected `.vars = Cement`

# Now applying cross validation 

stch <- cem %>% 
  stretch_tsibble(.init = 5, .step = 1)

# and now the forecasts


# The ETS model

fc1 <- stch %>% 
  model(ETS(Cement)) %>% 
  forecast(h = 1)


# The seasonal naive model. Lag = 4

fc2 <- stch %>% 
  model(SNAIVE(Cement ~ lag(4))) %>% 
  forecast(h = 1)


# b. 

# MSE of the resulting 4-step-ahead errors


stch1 <- cem %>% 
  stretch_tsibble(.init = 5, .step = 4)

# The two forecasts 

# ETS

fc1 <- stch1 %>% 
  model(ETS(Cement)) %>% 
  forecast(h = 1)

mse1 <- accuracy(fc1, cem) %>% 
  mutate(MSE = (RMSE)^2) %>% 
  select(MSE, RMSE)
mse1

## # A tibble: 1 × 2
##     MSE  RMSE
##   <dbl> <dbl>
## 1 6473.  80.5

# The MSE is RMSE squared, which is found to be 6473 from the table above.


# snaive

fc2 <- stch1 %>% 
  model(SNAIVE(Cement ~ lag(4))) %>% 
  forecast(h = 1)

mse2 <- accuracy(fc2, cem) %>% 
  mutate(MSE = (RMSE)^2) %>% 
  select(MSE, RMSE)
mse2

## # A tibble: 1 × 2
##      MSE  RMSE
##    <dbl> <dbl>
## 1 23018.  152.

# The RMSE of the snaive model is 152. Thus, the MSE is this value
# squared. You can view the value from the table above.

HW 5

Summer Ligon

2022-11-09

Load packages and data

Questions

Exercise 1

Exercise 2

Exercise 3

Exercise 4

Exercise 5

Exercise 6

Exercise 7

Exercise 8