Questions
Exercise 1
# Pigs slaughtered in Victoria:
pigs <- aus_livestock %>%
as_tsibble(index = Month) %>%
filter(Animal == "Pigs", State == "Victoria")
pigs
## # A tsibble: 558 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1972 Jul Pigs Victoria 94200
## 2 1972 Aug Pigs Victoria 105700
## 3 1972 Sep Pigs Victoria 96500
## 4 1972 Oct Pigs Victoria 117100
## 5 1972 Nov Pigs Victoria 104600
## 6 1972 Dec Pigs Victoria 100500
## 7 1973 Jan Pigs Victoria 94700
## 8 1973 Feb Pigs Victoria 93900
## 9 1973 Mar Pigs Victoria 93200
## 10 1973 Apr Pigs Victoria 78000
## # … with 548 more rows
## # ℹ Use `print(n = ...)` to see more rows
# a.
# Here is the SES model with respect to the number of pigs slaughtered in
# Victoria
fit_pigs <- pigs %>%
model(SES = ETS(Count ~ error("A") + trend("N") + season("N")))
# The SES model is fitted automatically to minimize the SSE. Thus,
# the optimal value of the weight and the level can found below:
tidy(fit_pigs)
## # A tibble: 2 × 5
## Animal State .model term estimate
## <fct> <fct> <chr> <chr> <dbl>
## 1 Pigs Victoria SES alpha 0.322
## 2 Pigs Victoria SES l[0] 100647.
# The optimal weight is 0.322 and the optimal level value is
# 100,647. An extremely large number, but I guess it makes sense
# considering the data.
# Now, generating the forecasts for the next four months. The data
# is already in year-month intervals, so h = 4.
fc_pigs <- fit_pigs %>%
forecast(h = 4)
View(fc_pigs)
# All four point forecasts have the same value which makes sense since
# it acts like the naive method (since this method is used for time series
# that has no clear seasonal patterns or trend).
# To plot the foreacst, I am going to start the data from 2010.
pigs1 <- pigs %>%
filter(year(Month) >= 2010)
fc_pigs1 <- pigs1 %>%
model(SES = ETS(Count ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = 4) %>%
autoplot(pigs1) +
labs(y = "Number of Pigs Slaughtered", title = "Victorian Pigs Slaughtered forecast for the Next Four Months")
fc_pigs1

# b.
# The prediction interval at the 95% level calculated by R for the first forecast
# is found from the hilo() function
pi <- fc_pigs %>%
hilo(level = 95)
View(pi)
# which is [76,854.79, 113,518.3].
# Calculating the prediction interval by hand consisted of finding the variance
# of the residuals from the report() function
report(fit_pigs)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
# and plugging it into the Forecast variance formula for the (A,N,N) model from
# slide 90/95. Since for the first forecast h = 1, the value for the Variance
# of the forecast ends up being equivalent to the variance of the residuals. You
# then take the square root of the value:
std <- sqrt(87480760)
std
## [1] 9353.115
# Then, multiply this value by 1.96 (the c value for the 95 percent level) which
# equals
int <- (std)*(1.96)
int
## [1] 18332.11
# Finally, you add and subtract the value to the first forecast.
y <- 95186.56
sub <- y - int
add <- y + int
# Therefore, the interval calculated by hand is [76,854.45, 113,518.67] which
# is basically the approximate value found by R!
Exercise 2
# I am going to choose Argentina as my country to analyze. I also
# convert it into a tsibble.
arg <- global_economy %>%
filter(Country == "Argentina") %>%
select(Year, Country, Exports) %>%
as_tsibble(index = Year)
arg
## # A tsibble: 58 x 3 [1Y]
## # Key: Country [1]
## Year Country Exports
## <dbl> <fct> <dbl>
## 1 1960 Argentina 7.60
## 2 1961 Argentina 5.99
## 3 1962 Argentina 4.69
## 4 1963 Argentina 7.89
## 5 1964 Argentina 5.56
## 6 1965 Argentina 6.23
## 7 1966 Argentina 6.65
## 8 1967 Argentina 7.50
## 9 1968 Argentina 6.48
## 10 1969 Argentina 6.40
## # … with 48 more rows
## # ℹ Use `print(n = ...)` to see more rows
# a.
autoplot(arg)
## Plot variable not specified, automatically selected `.vars = Exports`

# There is no clear seasonal patterns within the time series, and
# there is an random incline within the data at the start of the year
# 2000.
# I am going to use STL decomposition to examine the components different
# components of the data.
arg %>%
model(STL(Exports ~ trend() + season(), robust = TRUE)) %>%
components() %>%
autoplot()

# It isn't posting the seasonal component, so I'm anticipating that there
# doesn't exist any seasonality. Therefore, Holt's linear trend or SES method
# is applicable.
# b.
# Using a SES model with cross-validation to forecast the next 10 years.
# The fit of the model
fit <- arg %>%
model(SES = ETS(Exports ~ error("A") + trend("N") + season("N")))
# Forecasting it for the next 10 years so h = 10
fc <- fit %>%
forecast(h = 10)
# Now plotting it
fc %>%
autoplot(arg)

# A really big prediction interval that is normally distributed. The point
# forecast also seems to have a mean of zero.
# c.
# Using the accuracy function to compute the RMSE values for the training data.
# First, I must create the training set data and my test set.
train <- arg %>%
filter(Year <= 2002)
test <- arg %>%
filter(Year > 2002)
# The training set is about 70% of the entire data set.
# The forecast of the training set:
fc1 <- train %>%
model(SES = ETS(Exports ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = 15)
# And now the RMSE values for the training data:
accuracy(fc1, arg)
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SES Argentina Test -8.38 9.65 8.38 -57.0 57.0 4.82 3.10 0.786
# which is 9.65, a very high number for RMSE values.
# d.
# To compare, I am going to create a forecast using Holt's linear trend
# method with the model below. This method is best for use when the time
# series contains a trend. For Holt's linear trend, it is an additive
# trend method.
# The fit:
fit2 <- arg %>%
model(Holt = ETS(Exports ~ error("A") + trend("A") + season("N")))
# The forecast
fc2 <- fit2 %>%
forecast(h = 10)
# And now the plot
fc2 %>%
autoplot(arg)

# I now am creating a forecast using the training data to compute the RMSE
fc3 <- train %>%
model(Holt = ETS(Exports ~ error("A") + trend("A") + season("N"))) %>%
forecast(h = 15)
# Finally, the RMSE
accuracy(fc3, arg)
## # A tibble: 1 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Holt Argentina Test -10.3 12.3 10.3 -71.4 71.5 5.95 3.95 0.795
# The RMSE Holt's training data is higher than the SES method.
# e.
# From the STL decomposition, there seems to be a slight increasing trend in the data.
# Thus, Holt's linear method seems to be the winner even though the huge random incline
# caused the RMSE to be larger than the SES method. The SES method is best for whenever there
# isn't any seasonality or trend. Therefore, Holt's linear method is the best out of
# these two forecasts.
Exercise 3
# Chinese GDP time series
china <- global_economy %>%
filter(Country == "China") %>%
select(Year, Country, GDP) %>%
as_tsibble(index = Year)
china
## # A tsibble: 58 x 3 [1Y]
## # Key: Country [1]
## Year Country GDP
## <dbl> <fct> <dbl>
## 1 1960 China 59716467625.
## 2 1961 China 50056868958.
## 3 1962 China 47209359006.
## 4 1963 China 50706799903.
## 5 1964 China 59708343489.
## 6 1965 China 70436266147.
## 7 1966 China 76720285970.
## 8 1967 China 72881631327.
## 9 1968 China 70846535056.
## 10 1969 China 79705906247.
## # … with 48 more rows
## # ℹ Use `print(n = ...)` to see more rows
# To forecast the data, I am going to visualize the data
# first
autoplot(china)
## Plot variable not specified, automatically selected `.vars = GDP`

# There is no seasonality or cyclic patterns in any sense.
# There is a nonlinear increasing trend though. Thus, I am going
# to employ methods that don't involve any seasonality:
# SES, additive damped trend, and Holt's linear trend.
# I am going to plot the auto and then add various options within the ETS
# model to compare.
fc <- china %>%
model(auto = ETS(GDP)) %>%
forecast(h = 20) %>%
autoplot(china)
fc

# The graph visually changed, its more flat compared to the original graph.
# I'm guessing it's because the graph zoomed out so much from the prediction
# interval.
# Now, I am adding various options within the ETS method to create a damped trend,
# Holt's linear trend, and an SES (since all three methods don't require seasonality)
# to see the differences amongst them.
# First, the Damped trend (additive)
china %>%
model(damp = ETS(GDP ~ error("A") + trend("Ad") + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china)

# The damped trend is exactly what it sound like - an trend that dips down (increasing
# at a decreasing rate). It seems to fit the data pretty well, but I would have to know
# more information to know if the data would damp like that in the future.
# Next, Holt's linear trend method:
china %>%
model(holt = ETS(GDP ~ error("A") + trend("A") + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china)

# Holt's linear method produces a linear point forecast with a trend. This seems to fit
# the data's predicted forecast pretty nicely.
# Now, I want to employ an SES method to the data.
china %>%
model(SES = ETS(GDP ~ error("A") + trend("N") + season("N"))) %>%
forecast(h = 20) %>%
autoplot(china)

# The SES method is like Holt's linear trend method, except it doesn't have an inc/dec
# trend. Thus, this forecast doesn't seem to fit well.
# Now, I am going to find the correct lambda to find the correct transformation to balance the
# variance, if there is one.
china %>%
features(GDP, features = guerrero)
## # A tibble: 1 × 2
## Country lambda_guerrero
## <fct> <dbl>
## 1 China -0.0345
# The value is pretty close to zero. Thus, log is the appropriate transformation to
# employ.
china1 <- china %>%
mutate(GDP = log(GDP))
# Let's check the graph.
autoplot(china1)
## Plot variable not specified, automatically selected `.vars = GDP`

# The variance is indeed more balanced through out the data. I am now going
# to use this data to forecast with an ETS model
china1 %>%
model(auto = ETS(GDP)) %>%
forecast(h = 20) %>%
autoplot(china1)

# The forecast seems pretty nice with a more balanced variance!
# I want to check the accuracy of each method to see which one has the
# lowest RMSE.
china %>%
stretch_tsibble(.init = 10) %>%
model(SES = ETS(GDP ~ error("A") + trend("N") + season("N")),
Holt = ETS(GDP ~ error("A") + trend("A") + season("N")),
Damp = ETS(GDP ~ error("A") + trend("Ad") + season("N"))) %>%
forecast(h = 20) %>%
accuracy(china)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 20 observations are missing between 2018 and 2037
## # A tibble: 3 × 11
## .model Country .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damp China Test 1.37e12 2.74e12 1.45e12 38.1 41.3 6.69 6.55 0.869
## 2 Holt China Test 1.28e12 2.61e12 1.38e12 35.9 39.3 6.37 6.22 0.867
## 3 SES China Test 1.95e12 3.45e12 1.95e12 55.2 55.4 9.01 8.23 0.878
# It seems that Holt's linear trend method works best out of these
# three. I am also going to check the auto ETS method:
china %>%
model(ETS(GDP)) %>%
accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China ETS(GDP) Traini… 4.13e10 2.00e11 9.66e10 2.11 7.72 0.446 0.477 0.268
# Of course the auto method has the lowest RMSE.
# What about the auto ETS model with the box-cox transformation?
china1 %>%
model(ETS(GDP)) %>%
accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China ETS(GDP) Training 0.0116 0.0906 0.0722 0.0434 0.272 0.620 0.687 0.179
# Look how low the RMSE is! This is definitely the best forecast overall!
Exercise 4
# Gas data from aus_production
gas <- aus_production %>%
select(Quarter, Gas) %>%
as_tsibble(index = Quarter)
gas
## # A tsibble: 218 x 2 [1Q]
## Quarter Gas
## <qtr> <dbl>
## 1 1956 Q1 5
## 2 1956 Q2 6
## 3 1956 Q3 7
## 4 1956 Q4 6
## 5 1957 Q1 5
## 6 1957 Q2 7
## 7 1957 Q3 7
## 8 1957 Q4 6
## 9 1958 Q1 5
## 10 1958 Q2 7
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows
# To find an ETS model that fits best, I need to visualize the
# data
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`

# There is apparent seasonality and an increasing trend. The seasonal
# patterns' variations are changing proportional to the level of the
# series. Thus, the season component should be multiplicative. Since there
# is an increasing trend, that maybe has a slight damp, the trend component
# should have either additive or damped in it. I will do methods
# that deal with seasonality with different trend options to compare.
# First, Holt-Winters' multiplicative method:
# before I forecast it, I am going to check the residuals
fit1 <- gas %>%
model(ETS(Gas ~ error("A") + trend("A") + season("M")))
fit1
## # A mable: 1 x 1
## `ETS(Gas ~ error("A") + trend("A") + season("M"))`
## <model>
## 1 <ETS(A,A,M)>
gg_tsresiduals(fit1)

# The variance of the residuals is not constant at all, causing the
# calculation of the prediction interval hard to be made. There is also
# some prevalent autocorrelation. The mean is centered around 0 which is
# good. To check is the residuals are distinguishable from white noise, I
# am going to check the significance using the ljung-box test
# Since the seasonality is occuring about once a year, lag = 2m =
# 2(1) = 2.
aug <- augment(fit1)
aug %>%
features(.innov, ljung_box, lag = 2, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"A\") + trend(\"A\") + season(\"M\"))" 2.12 0.346
# Since p value > 0.05, the residuals are not significant at the
# 5 percent level. Thus, the residuals are not distinguishable from
# a white noise which is what we want.
gas %>%
model(ETS(Gas ~ error("A") + trend("A") + season("M"))) %>%
forecast(h = 36) %>%
autoplot(gas)

report(fit1)
## Series: Gas
## Model: ETS(A,A,M)
## Smoothing parameters:
## alpha = 0.6132245
## beta = 0.007863848
## gamma = 0.2244134
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 3.620487 0.6098927 0.9796171 1.147619 1.075495 0.7972686
##
## sigma^2: 18.2237
##
## AIC AICc BIC
## 1816.462 1817.328 1846.923
# These are the parameters estimated for Holt-Winters' multiplicative
# method (with an additive error term).
# This forecast looks nice!
# Now, Holt-Winters' damped method:
# Checking the residuals
fit2 <- gas %>%
model(ETS(Gas ~ error("A") + trend("Ad") + season("M")))
gg_tsresiduals(fit2)

# The residuals seems to follow the same patterns from Holt-Winters'
# multiplicative method. To see if the residuals are distinguishable from
# white noise:
aug1 <- augment(fit2)
aug1 %>%
features(.innov, ljung_box, lag = 2, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ETS(Gas ~ error(\"A\") + trend(\"Ad\") + season(\"M\"))" 2.99 0.224
# The p value is greater than 0.05, so it's not distinguishable from
# white noise, however it's less than the p value from Holt-Winters'
# multiplicative method. Thus, the last method extracted the data
# from the residuals more than this one.
gas %>%
model(ETS(Gas ~ error("A") + trend("Ad") + season("M"))) %>%
forecast(h = 36) %>%
autoplot(gas)

# This forecast looks so nice! I think the damped trend improve the forecast
# from a visual standpoint.
# I am going to allow the ETS() function select the model by minimising the AICc
fit3 <- gas %>%
model(ETS(Gas))
report(fit3)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
# The model chosen was ETS(M,A,M). Gamma is smaller in this example
# than from Holt-Winters' multi with the additive error term. This means
# that for the ETS(M,A,M) model, the seasonality doesn't change us much
# overtime.
fit4 <- gas %>%
model(ETS(Gas ~ error("M") + trend("A") + season("M")))
fc4 <- fit4 %>%
forecast(h = 36)
fc4 %>%
autoplot(gas)

# This forecast definitely seems the most appropriate. I am going to compare the
# components from the additive error term to the multi.
# Additive error term:
components(fit1) %>%
autoplot()
## Warning: Removed 4 row(s) containing missing values (geom_path).

# Multi error term:
components(fit4) %>%
autoplot()
## Warning: Removed 4 row(s) containing missing values (geom_path).

# The trend didn't change by much, but the seasonal patterns do not change us much
# for the multi errors compared to the additive. The slope is also very different.
# Now, I'm checking the residuals. Note that since the model has multi errors,
# the innovation residuals are not equivalent to the regular residuals.
aug3 <- augment(fit4)
# residuals:
ggplot(aug3, aes(x = Quarter, y = .resid)) +
geom_line()

# innovative residuals:
ggplot(aug3, aes(x = Quarter, y = .innov)) +
geom_line()

# The innovative residuals have more of a constant variance even though it still
# isn't very balanced.
gg_tsresiduals(fit4)

# There seems to be a lot of autocorrelation which is not good.
# The mean is centered around zero which is good.
# Even though the ETS function chose the appropriate model, I am
# going to employ a damped trend to the ETS(M,A,M) model.
fit5 <- gas %>%
model(ETS(Gas ~ error("M") + trend("Ad") + season("M")))
fc5 <- fit5 %>%
forecast(h = 36)
fc5 %>%
autoplot(gas)

# The forecast looks the same except with a damped trend. I am going to check
# the parameters and AICc.
report(fit5)
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
# The AICc only went up by 5 units. The parameters are very similar to
# one another. Now, I am going to check the residuals.
gg_tsresiduals(fit5)

# It seems to fail to the same axioms as before: not a constant variance in the
# residuals and autocorrelation. The mean is centered around the mean. Thus, the
# damped trend did not improve the forecast in this case. Therefore, I am sticking
# with the ETS(M,A,M) model!
Exercise 5
# My set seed is 8 (my number in soccer)
set.seed(8)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries
## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 New South Wales Household goods retailing A3349397X 1982 Apr 211.
## 2 New South Wales Household goods retailing A3349397X 1982 May 224.
## 3 New South Wales Household goods retailing A3349397X 1982 Jun 216.
## 4 New South Wales Household goods retailing A3349397X 1982 Jul 226.
## 5 New South Wales Household goods retailing A3349397X 1982 Aug 217.
## 6 New South Wales Household goods retailing A3349397X 1982 Sep 213.
## 7 New South Wales Household goods retailing A3349397X 1982 Oct 224.
## 8 New South Wales Household goods retailing A3349397X 1982 Nov 244.
## 9 New South Wales Household goods retailing A3349397X 1982 Dec 393.
## 10 New South Wales Household goods retailing A3349397X 1983 Jan 220.
## # … with 431 more rows
## # ℹ Use `print(n = ...)` to see more rows
myseries %>% autoplot(Turnover)

View(myseries)
# a.
# Multiplicative seasonality is necessary since the proportionality of seasonal
# patterns change to different levels of time.
# b.
# Applying Holt-Winters' multiplicative method to the data.
fit <- myseries %>%
model(multi = ETS(Turnover ~ error("A") + trend("A") + season("M")))
fc <- fit %>%
forecast(h = 36)
fc %>%
autoplot(myseries)

# The forecast looks very nice. Now, experimenting with the ETS model
# by making the trend damped.
fit1 <- myseries %>%
model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M")))
fc1 <- fit1 %>%
forecast(h = 36)
fc1 %>%
autoplot(myseries)

# This forecast has a damping trend which seems applicable for this type
# of time series.
# c.
# I am now comparing the RMSE of the one-step forecasts from the two methods
# with cross-validation.
# My data for the Holt-Winters' multi:
# myseries %>%
# model(ETS(Turnover ~ error("A") + trend("A") + season("M"))) %>%
# forecast(h = 1) %>%
# accuracy(myseries)
# For the damped Holt-Winters' multi:
# myseries %>%
# model(ETS(Turnover ~ error("A") + trend("Ad") + season("M))) %>%
# forecast(h = 1) %>%
# accuracy(myseries)
# My RMSE wouldn't load, so I am going to have to take a
# Loss here :(
# d.
# Checking the residuals of the ETS(A,Ad,M) model.
gg_tsresiduals(fit1)

# The variance of the innovative residuals seem constant with a few outliers.
# There exists some autocorrelation and the mean is centered around 0. I am
# using the ljung-box test to check the significance of my residuals. lag = 2(m) =
# 2(12) = 24 and dof = 2.
augment(fit1) %>%
features(.innov, ljung_box, lag = 24, dof = 2)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 New South Wales Household goods retailing damp 71.7 0.000000356
# Since the p value is greater than 0.05, the residuals are not significant at
# the 5 percent level, meaning the residuals are not distinguishable from white
# noise.
aug4 <- augment(fit1)
ggplot(aug4, aes(x = Month, y = .resid)) +
geom_line()

# The data seems to look like white noise with a constant variance across with
# the exception of a few outliers.
# e.
# I am now comparing the RMSE between the two methods with training
# and test sets.
series <- myseries %>%
as_tsibble(index = Month)
series
## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 New South Wales Household goods retailing A3349397X 1982 Apr 211.
## 2 New South Wales Household goods retailing A3349397X 1982 May 224.
## 3 New South Wales Household goods retailing A3349397X 1982 Jun 216.
## 4 New South Wales Household goods retailing A3349397X 1982 Jul 226.
## 5 New South Wales Household goods retailing A3349397X 1982 Aug 217.
## 6 New South Wales Household goods retailing A3349397X 1982 Sep 213.
## 7 New South Wales Household goods retailing A3349397X 1982 Oct 224.
## 8 New South Wales Household goods retailing A3349397X 1982 Nov 244.
## 9 New South Wales Household goods retailing A3349397X 1982 Dec 393.
## 10 New South Wales Household goods retailing A3349397X 1983 Jan 220.
## # … with 431 more rows
## # ℹ Use `print(n = ...)` to see more rows
train <- series %>%
filter(year(Month) <= 2010)
test <- series %>%
filter(year(Month) > 2010)
fc_train <- train %>%
model(multi = ETS(Turnover ~ error("A") + trend("A") + season("M"))) %>%
forecast(h = 8)
accuracy(fc_train, series)
## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 multi New So… Househ… Test -29.2 31.5 29.2 -2.81 2.81 0.715 0.617 0.00532
## # … with abbreviated variable name ¹Industry
# The RMSE for the residuals for the training set for the ETS(A,A,M) model is
# 31.3. Now, for the ETS(A,Ad,M) model.
fc_train1 <- train %>%
model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M"))) %>%
forecast(h = 8)
accuracy(fc_train1, series)
## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damp New So… Househ… Test -18.1 22.8 21.2 -1.76 2.04 0.520 0.445 -0.0618
## # … with abbreviated variable name ¹Industry
# The RMSE for this model is 22.6 which is slightly lower than the first one,
# so obviously I prefer the ETS(A,Ad,M) model based on the test set.
# The seasonal naive approach is below.
fit2 <- train %>%
model(SN = SNAIVE(Turnover ~ lag(12)))
fc2 <- fit2 %>%
forecast(h = 8)
accuracy(fc2, series)
## # A tibble: 1 × 12
## .model State Indus…¹ .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SN New Sout… Househ… Test -21.7 28.5 21.7 -2.04 2.04 0.532 0.558 0.137
## # … with abbreviated variable name ¹Industry
# The RMSE is 28.5, so the ETS(A,Ad,M) model beat the seasonal
# naive approach!
series %>%
model(damp = ETS(Turnover ~ error("A") + trend("Ad") + season("M"))) %>%
forecast(h = 36) %>%
autoplot(series)

# The forecast seems reasonable!!
Exercise 6
# The total domestic overnight tips across Australia
trips <- tourism %>%
summarise(Trips = sum(Trips)) %>%
as_tsibble(index = Quarter)
trips
## # A tsibble: 80 x 2 [1Q]
## Quarter Trips
## <qtr> <dbl>
## 1 1998 Q1 23182.
## 2 1998 Q2 20323.
## 3 1998 Q3 19827.
## 4 1998 Q4 20830.
## 5 1999 Q1 22087.
## 6 1999 Q2 21458.
## 7 1999 Q3 19914.
## 8 1999 Q4 20028.
## 9 2000 Q1 22339.
## 10 2000 Q2 19941.
## # … with 70 more rows
## # ℹ Use `print(n = ...)` to see more rows
# a.
# plotting the data
autoplot(trips)
## Plot variable not specified, automatically selected `.vars = Trips`

# The seasonality doesn't seem to be clear. There is an constant trend in the data
# and then a sudden increase in the trend. This could indicate some cyclic behaviuor.
# b.
# Now, I am decomposing the series to observe the different components
# and collect the seasonally adjusted data (y - s).
stl <- trips %>%
model(STL(Trips ~ season() + trend())) %>%
components()
View(stl)
# Now selecting the seasonally adjusted data
adj <- stl %>%
select(season_adjust)
adj
## # A tsibble: 80 x 2 [1Q]
## season_adjust Quarter
## <dbl> <qtr>
## 1 21939. 1998 Q1
## 2 20670. 1998 Q2
## 3 20566. 1998 Q3
## 4 20989. 1998 Q4
## 5 20843. 1999 Q1
## 6 21801. 1999 Q2
## 7 20652. 1999 Q3
## 8 20196. 1999 Q4
## 9 21093. 2000 Q1
## 10 20280. 2000 Q2
## # … with 70 more rows
## # ℹ Use `print(n = ...)` to see more rows
# I am plotting it just to observe.
adj %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = season_adjust`

# It hasn't changed much from the original graph, indicating that there wasn't
# much seasonality within it to begin with.
# c.
# The code from the homework wasn't working, so I'm creating the ETS model using
# the seasonally adjust data.
stl_fit <- adj %>%
model(ETS(season_adjust ~ error("A") + trend("Ad") + season("N")))
stl_fit
## # A mable: 1 x 1
## `ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))`
## <model>
## 1 <ETS(A,Ad,N)>
# Now forecasting the model when h = 8 (8 quarters)
stl_fc <- stl_fit %>%
forecast(h = 8)
# I want to see how the forecast looks on the graph
stl_fc %>%
autoplot(adj)

# It looks reasonable.
# d.
# Now creating a Holt linear model based on the seasonally adjusted data.
stl_fit1 <- adj %>%
model(ETS(season_adjust ~ error("A") + trend("A") + season("N")))
stl_fc1 <- stl_fit1 %>%
forecast(h = 8)
# Now plotting the forecast
stl_fc1 %>%
autoplot(adj)

# This forecast is very similar to the ETS model from before except the trend
# is not damped.
# e.
# To find a seasonal model, I am going to use the original data instead of the
# seasonally adjusted data.
trip_fit <- trips %>%
model(ETS(Trips)) %>%
report()
## Series: Trips
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.4495675
## beta = 0.04450178
## gamma = 0.0001000075
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
##
## sigma^2: 699901.4
##
## AIC AICc BIC
## 1436.829 1439.400 1458.267
# It returned an ETS(A,A,A) model (so additive within each component) which is
# called Additive Holt-Winters' method.
# f.
# ETS model
trips %>%
model(ETS(Trips)) %>%
accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
# The RMSE seems to be very high at 794. Let's compare.
# Season_adj model
adj %>%
model(
damp = ETS(season_adjust ~ error("A") + trend("Ad") + season("N")),
Holt = ETS(season_adjust ~ error("A") + trend("A") + season("N"))
) %>%
accuracy()
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damp Training 87.6 746. 566. 0.301 2.67 0.594 0.618 -0.00831
## 2 Holt Training 83.7 746. 563. 0.289 2.66 0.590 0.617 -0.00833
# The damped trend has a RMSE of 746 and Holt's linear method has an RMSE
# of 746 too. Since they both have the same RMSE value, we can now choose based
# on the MAE score. Holt's linear method has the lowest MAE sore of the two. Thus,
# this would be the prefferred model over all three methods so far.
# g.
# Now, I am comparing the forecasts of the three by computing the RMSE values
# and analyzing the three separate plots of the forecast.
# I have already plotted the forecasts of the seasonally adjusted data using
# Holt's linear method and with a damped trend, so I am now going to plot the
# ETS() generated model (ETS(A,A,A)).
trips %>%
model(ETS(Trips)) %>%
forecast(h = 8) %>%
autoplot(trips)

# Honestly, graphically wise, this model looks the best out of all three. To be
# precise in my decision making, I am going to compute the RMSE of all three
# forecast using the accuracy() function. Cross-validation is also used in the
# process.
trips %>%
stretch_tsibble(.init = 10) %>%
model(ETS(Trips)) %>%
forecast(h = 8) %>%
accuracy(trips)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 8 observations are missing between 2018 Q1 and 2019 Q4
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Test 466. 1403. 1118. 1.72 5.11 1.18 1.16 0.659
# The RMSE for this forecast is 1403. Seem pretty high but let's compare.
adj %>%
stretch_tsibble(.init = 10) %>%
model(
damp = ETS(season_adjust ~ error("A") + trend("Ad") + season("N")),
Holt = ETS(season_adjust ~ error("A") + trend("A") + season("N"))
) %>%
forecast(h = 8) %>%
accuracy(adj)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 8 observations are missing between 2018 Q1 and 2019 Q4
## # A tibble: 2 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 damp Test 419. 1315. 1052. 1.61 4.83 1.10 1.09 0.694
## 2 Holt Test 517. 1474. 1164. 2.11 5.39 1.22 1.22 0.736
# The RMSE is 1315 for the damped trend method and 1474 for Holt's linear method.
# Thus, according to the RMSE, the damped trend method is best for the forecast
# when it's seasonally adjusted data. I could agree with this based on the graph.
# I am going to choose the damped trend for my overall forecast since it has the
# lowest RMSE for the forecast, tied for the lowest RMSE for the fit, and represents
# the data's forecast best graphically.
# h.
# I am now checking the residuals of the damped trend method with the seasonally
# adjusted data.
gg_tsresiduals(stl_fit)

# The residuals overall have a constant variance with a few outliers, there seems
# to be a little autocorrelation, and the mean is centered around zero with an
# outlier. I'm going to run the ljung-box test to see if the residuals are
# significant. Lag = 10 since there seems to be no seasonality.
augment(stl_fit) %>%
features(.innov, ljung_box, lag = 10, dof = 0)
## # A tibble: 1 × 3
## .model lb_stat lb_pv…¹
## <chr> <dbl> <dbl>
## 1 "ETS(season_adjust ~ error(\"A\") + trend(\"Ad\") + season(\"… 9.08 0.525
## # … with abbreviated variable name ¹lb_pvalue
# The p value is greater than 0.05 so the residuals are not significant at the
# 5 percent level, aka the residuals are indistinguishable from white noise!
Exercise 7
# NZ to Australia
nz <- aus_arrivals %>%
filter(Origin == "NZ") %>%
as_tsibble(index = Quarter)
View(nz)
# a.
autoplot(nz)
## Plot variable not specified, automatically selected `.vars = Arrivals`

# Yearly seasonality seems to roughly exist. There is also an increasing trend.
# b.
# Creating a training set that consists of all years except for the last two
train <- nz %>%
filter(year(Quarter) <= 2010)
View(train)
test <- nz %>%
filter(year(Quarter) > 2010)
recent <- nz %>%
filter(year(Quarter) >= 1981)
# Now I am forecasting the test set (and comparing) using the Holt-Winters'
# multiplicative method
fit <- train %>%
model(ETS(Arrivals ~ error("M") + trend("A") + season("M")))
fc <- fit %>%
forecast(h = 8)
# I am now plotting it to view it graphically
fc %>%
autoplot(nz)

# The forecast seems to fit the test set pretty accurately.
# c.
# The variance of the seasonality is not constant throught out the data, for it
# proportionally changes as time continues. Therefore, multiplicative seasonality
# is necessary for the forecast.
# d.
# I am now forecasting the two year test set along with the actual data for the
# following methods (without the PI):
# i) an ETS model (the auto method?)
train %>%
model(ETS(Arrivals)) %>%
report()
## Series: Arrivals
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6481867
## beta = 0.000100016
## gamma = 0.2271947
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 75543.76 2033.626 1.001149 1.229188 1.101512 0.6681509
##
## sigma^2: 0.0082
##
## AIC AICc BIC
## 2856.290 2857.926 2881.377
# This Holt-Winters' multi method
fc1 <- train %>%
model(ETS(Arrivals)) %>%
forecast(h = 8)
fc1 %>%
autoplot(nz)

# ii) ETS model applied to a log transformed series
log <- nz %>%
mutate(Arrivals = log(Arrivals))
train2 <- log %>%
filter(year(Quarter) <= 2010)
train2 %>%
model(ETS(Arrivals)) %>%
report()
## Series: Arrivals
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.6827185
## gamma = 0.2231326
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3]
## 11.18255 0.01514111 0.2412994 0.1150147 -0.3714552
##
## sigma^2: 0.0079
##
## AIC AICc BIC
## 2.073627 3.073627 21.586070
# This ETS method says there's no trend but an additive seasonal component. The
# AICc is 3.07
fc2 <- train2 %>%
model(ETS(Arrivals)) %>%
forecast(h = 8)
fc2 %>%
autoplot(log)

# I feel as if damped trend would look best for this case though.
train2 %>%
model(ETS(Arrivals ~ error("A") + trend("Ad") + season("A"))) %>%
report()
## Series: Arrivals
## Model: ETS(A,Ad,A)
## Smoothing parameters:
## alpha = 0.5595716
## beta = 0.09409677
## gamma = 0.2153613
## phi = 0.8219273
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 11.28025 -0.08118968 0.02259455 0.2404159 0.1114745 -0.374485
##
## sigma^2: 0.0079
##
## AIC AICc BIC
## 3.770480 5.788828 31.645397
# The AICc is 5.78
fc3 <- train2 %>%
model(ETS(Arrivals ~ error("A") + trend("Ad") + season("A"))) %>%
forecast(h = 8)
fc3 %>%
autoplot(log)

# The forecasts actually seem very identical to one another.Thus, I
# am going to rely on the AICc value for this one.
# iii) seasonal naive method
# The lag will be equal to 4 since it's quarterly data with yearly seasonality
fc4 <- train %>%
model(seasonal_naive = SNAIVE(Arrivals ~ lag(4))) %>%
forecast(h = 8)
fc4 %>%
autoplot(nz)

# iv) an STL decomposition applied to the log transformed data followed by an
# ETS model applied to the seasonally adjusted (transformed) data.
# Using the log transformed data, I am going to employ an STL decomposition to
# find my seasonally adjusted data
stl_fit <- train2 %>%
model(STL(Arrivals ~ season() + trend())) %>%
components()
View(stl_fit)
adj <- stl_fit %>%
select(Quarter, season_adjust)
adj
## # A tsibble: 120 x 2 [1Q]
## Quarter season_adjust
## <qtr> <dbl>
## 1 1981 Q1 11.1
## 2 1981 Q2 11.3
## 3 1981 Q3 11.2
## 4 1981 Q4 11.0
## 5 1982 Q1 11.0
## 6 1982 Q2 10.9
## 7 1982 Q3 11.0
## 8 1982 Q4 10.9
## 9 1983 Q1 11.0
## 10 1983 Q2 10.8
## # … with 110 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Here is the plot of the seasonally adjusted data
adj %>%
autoplot()
## Plot variable not specified, automatically selected `.vars = season_adjust`

# Now, an ETS model applied to the seasoanlly adjusted data
adj %>%
model(ETS(season_adjust)) %>%
report()
## Series: season_adjust
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.7859241
## beta = 0.0001000017
##
## Initial states:
## l[0] b[0]
## 11.1245 0.01174078
##
## sigma^2: 0.0054
##
## AIC AICc BIC
## -45.50147 -44.97515 -31.56401
# The ETS reported the Holt's linear method to be the best (lowest AICc)
# Here is the forecast of the test set.
fc5 <- adj %>%
model(ETS(season_adjust)) %>%
forecast(h = 8)
fc5
## # A fable: 8 x 4 [1Q]
## # Key: .model [1]
## .model Quarter season_adjust .mean
## <chr> <qtr> <dist> <dbl>
## 1 ETS(season_adjust) 2011 Q1 N(13, 0.0054) 12.6
## 2 ETS(season_adjust) 2011 Q2 N(13, 0.0088) 12.6
## 3 ETS(season_adjust) 2011 Q3 N(13, 0.012) 12.6
## 4 ETS(season_adjust) 2011 Q4 N(13, 0.015) 12.6
## 5 ETS(season_adjust) 2012 Q1 N(13, 0.019) 12.7
## 6 ETS(season_adjust) 2012 Q2 N(13, 0.022) 12.7
## 7 ETS(season_adjust) 2012 Q3 N(13, 0.026) 12.7
## 8 ETS(season_adjust) 2012 Q4 N(13, 0.029) 12.7
# To plot it, I need to find the seasonally adjusted component of the entire
# series
stl_fit2 <- log %>%
model(STL(Arrivals ~ season() + trend())) %>%
components()
adj1 <- stl_fit2 %>%
select(Quarter, season_adjust)
View(adj1)
# Now the test set forecast plot
fc5 %>%
autoplot(adj1)

# This forecast doesn't seem to be very reasonable.
# e.
# I am finding the RMSE values for each forecast using the accuracy() fucntion
# to choose which method is best.
# i)
accuracy(fc1, recent)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2012 Q4
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arriva… NZ Test -10467. 18302. 12746. -3.27 4.16 0.855 0.946 -0.0857
# The RMSE for Holt-Winters' multiplicative method has an
# RMSE of 18302.
# ii)
accuracy(fc2, log)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2012 Q4
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arr… NZ Test -0.00383 0.0444 0.0381 -0.0288 0.303 0.345 0.310 0.00656
# The ETS model that is applied to a log transformation is very low at 0.0444.
# I wonder if this is because of the units. After looking at the data, it is
# definitely because of the unit change. It is the same case when comparing the
# AICc. I tried to compare the accuracy using the innovative residuals, but it
# was equal to the actual residuals. For now, I will choose the best model based
# on the graph.
# iii)
accuracy(fc4, recent)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2012 Q4
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 seasonal_naive NZ Test 5692. 14962. 14203. 2.27 4.97 0.952 0.773 -0.116
# The seasonal naive model as RMSE of 14962. This is lower than Holt-Winters'
# multi model. So this one will have priority over this one.
# iv)
accuracy(fc5, adj1)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 1 observation is missing at 2012 Q4
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(season_adjus… Test -0.0599 0.0702 0.0599 -0.476 0.476 0.541 0.491 -0.0108
# The RMSE is again very low at 0.0702 because of the log transformation. The
# ETS model applied to a log transformation is the lowest for this case.
# Now I have to compare the two graphs from the ETS log transformed forecast and
# the seasonal naive model
# ETS
fc2 %>%
autoplot(log)

# Snaive
fc4 %>%
autoplot(nz)

# Honestly, the prediction intervals for the ETS log transformed data looks very
# nice. Therefore, this will be my chosen model.
# I am now checking the residuals of the chosen model, ETS log trans.:
fit2 <- train2 %>%
model(ETS(Arrivals))
gg_tsresiduals(fit2)

# The mean seems to be centered around the mean and there is no autocorrelation.
# The variance could be balanced more, but this will just cause complications
# for the prediction intervals.
# To make sure there is not autocorrelation, I am going to employ the ljung-box
# test. Lag = 8 for quarterly data with a yearly seasonality.
augment(fit2) %>%
features(.innov, ljung_box, lag = 8, dof = 0)
## # A tibble: 1 × 4
## Origin .model lb_stat lb_pvalue
## <chr> <chr> <dbl> <dbl>
## 1 NZ ETS(Arrivals) 6.18 0.627
# It has a very large p value which is desired. The residuals are indistinguishable
# from white noise!
# f.
# I am going to use time series cross-validation to compare the same four methods
stch <- nz %>%
stretch_tsibble(.init = 36, .step = 3)
# i) an ETS model
fc6 <- stch %>%
model(ETS(Arrivals)) %>%
forecast(h = 3)
accuracy(fc6, nz)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 2 observations are missing between 2012 Q4 and 2013 Q1
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals) NZ Test 4306. 15459. 11940. 2.12 6.47 0.811 0.812 0.254
# The RMSE for the ETS model is 15459.
# ii) ETS applied to a log transformed series
stch1 <- log %>%
stretch_tsibble(.init = 36, .step = 3)
fc7 <- stch1 %>%
model(ETS(Arrivals)) %>%
forecast(h = 3)
accuracy(fc7, log)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 2 observations are missing between 2012 Q4 and 2013 Q1
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Arrivals) NZ Test 0.0311 0.0921 0.0674 0.255 0.560 0.630 0.659 0.301
# The forecast for this model is 0.0921
# iii) seasonal naive
fc8 <- stch %>%
model(SNAIVE(Arrivals ~ lag(4))) %>%
forecast(h = 3)
accuracy(fc8, nz)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 2 observations are missing between 2012 Q4 and 2013 Q1
## # A tibble: 1 × 11
## .model Origin .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Arrival… NZ Test 8239. 18682. 14350. 3.82 7.70 0.974 0.981 0.566
# The RMSE is 18682.
# iv) STL decomp. applied to log trans. followed by ETS applied to
# the seasonally adj data
stl_fit3 <- stch1 %>%
model(STL(Arrivals ~ season() + trend())) %>%
components()
adj2 <- stl_fit3 %>%
select(!c(.model))
fc9 <- adj2 %>%
model(ETS(season_adjust)) %>%
forecast(h = 3)
View(fc9)
accuracy(fc9, adj1)
## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing.
## 2 observations are missing between 2012 Q4 and 2013 Q1
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(season_adjust) Test 0.0294 0.0817 0.0618 0.241 0.511 0.577 0.585 0.405
# The RMSE of this model is 0.0817.
# The results actually did change. The original ETS model became
# more suitable than the snaive model, and the ETS of the log trans. seasonally
# adjusted data became better than the ETS log trans. model, so opposite from
# the results from before.
Exercise 8
# a.
# manipulating the data
View(aus_production)
cem <- aus_production %>%
select(Quarter, Cement) %>%
as_tsibble(index = Quarter)
cem
## # A tsibble: 218 x 2 [1Q]
## Quarter Cement
## <qtr> <dbl>
## 1 1956 Q1 465
## 2 1956 Q2 532
## 3 1956 Q3 561
## 4 1956 Q4 570
## 5 1957 Q1 529
## 6 1957 Q2 604
## 7 1957 Q3 603
## 8 1957 Q4 582
## 9 1958 Q1 554
## 10 1958 Q2 620
## # … with 208 more rows
## # ℹ Use `print(n = ...)` to see more rows
autoplot(cem)
## Plot variable not specified, automatically selected `.vars = Cement`

# Now applying cross validation
stch <- cem %>%
stretch_tsibble(.init = 5, .step = 1)
# and now the forecasts
# The ETS model
fc1 <- stch %>%
model(ETS(Cement)) %>%
forecast(h = 1)
# The seasonal naive model. Lag = 4
fc2 <- stch %>%
model(SNAIVE(Cement ~ lag(4))) %>%
forecast(h = 1)
# b.
# MSE of the resulting 4-step-ahead errors
stch1 <- cem %>%
stretch_tsibble(.init = 5, .step = 4)
# The two forecasts
# ETS
fc1 <- stch1 %>%
model(ETS(Cement)) %>%
forecast(h = 1)
mse1 <- accuracy(fc1, cem) %>%
mutate(MSE = (RMSE)^2) %>%
select(MSE, RMSE)
mse1
## # A tibble: 1 × 2
## MSE RMSE
## <dbl> <dbl>
## 1 6473. 80.5
# The MSE is RMSE squared, which is found to be 6473 from the table above.
# snaive
fc2 <- stch1 %>%
model(SNAIVE(Cement ~ lag(4))) %>%
forecast(h = 1)
mse2 <- accuracy(fc2, cem) %>%
mutate(MSE = (RMSE)^2) %>%
select(MSE, RMSE)
mse2
## # A tibble: 1 × 2
## MSE RMSE
## <dbl> <dbl>
## 1 23018. 152.
# The RMSE of the snaive model is 152. Thus, the MSE is this value
# squared. You can view the value from the table above.