5.1 Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Visualization of Australian Population (global_economy)

global_economy %>%
  filter(Country == "Australia") %>%
  autoplot(Population) +
  labs(
    title = "Australian Population",
    y = "Population",
    x = "Year"
  )

Comment: Australian population shows a strong long-term upward trend with relatively smooth growth and no seasonality. The most appropriate is the random walk drift method for the upward trend.

Australian population forecast:

aus_pop <- global_economy %>%
  filter(Country == "Australia")

pop_fit <- aus_pop %>%
  model(RW(Population ~ drift()))

pop_fc <- pop_fit %>%
  forecast(h = "10 years")

autoplot(aus_pop, Population) +
  autolayer(pop_fc, series = "Forecast") +
  labs(title = "Australia Population Forecast",
       y = "Population")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

Bricks (aus_production)

Visualization of Australian Clay Brick Production

aus_production %>%
  autoplot(Bricks) +
  labs(
    title = "Australian Clay Brick Production",
    y = "Bricks",
    x = "Quarter"
  )
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Comment: Brick production is quarterly and strongly seasonal (construction cycles). The seasonal naïve method repeats the value from the same quarter of the previous year.

Brick production forecast:

bricks <- aus_production %>%
  filter(!is.na(Bricks))

bricks_fit <- bricks %>%
  model(SNAIVE(Bricks))

bricks_fc <- bricks_fit %>%
  forecast(h = "5 years")  # or whatever horizon you choose

autoplot(bricks, Bricks) +
  autolayer(bricks_fc, series = "Forecast") +
  labs(title = "Bricks Production Forecast (SNAIVE)")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

NSW Lambs (aus_livestock)

Plot of NSW Lamb Slaughter Numbers

aus_livestock %>%
  filter(State == "New South Wales",
         Animal == "Lambs") %>%
  autoplot(Count) +
  labs(
    title = "NSW Lamb Slaughter Numbers",
    y = "Number of Lambs",
    x = "Month"
  )

Comment: Lamb slaughtering numbers are highly seasonal (agricultural cycles). The seasonal naïve approach captures repeating yearly seasonal patterns effectively.

NSW Lamb forecast:

nsw_lambs <- aus_livestock %>%
  filter(State == "New South Wales", Animal == "Lambs")

lambs_fit <- nsw_lambs %>%
  model(SNAIVE(Count))

lambs_fc <- lambs_fit %>%
  forecast(h = "5 years")

autoplot(nsw_lambs, Count) +
  autolayer(lambs_fc, series = "Forecast") +
  labs(title = "NSW Lambs Count Forecast (SNAIVE)")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

Household wealth (hh_budget).

Visualization of Australian Household Wealth

hh_budget %>%
  autoplot(Wealth) +
  labs(
    title = "Australian Household Wealth",
    y = "Wealth",
    x = "Year"
  )

Household wealth forecast:

wealth <- hh_budget

wealth_fit <- wealth %>%
  model(RW(Wealth ~ drift()))

wealth_fc <- wealth_fit %>%
  forecast(h = "5 years")

autoplot(wealth, Wealth) +
  autolayer(wealth_fc, series = "Forecast") +
  labs(title = "Household Wealth Forecast (Drift)")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

Australian takeaway food turnover (aus_retail).

takeaway <- aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  summarise(Turnover = sum(Turnover))

takeaway %>%
  autoplot(Turnover) +
  labs(
    title = "Australian Takeaway Food Turnover (Total)",
    y = "Turnover (AUD)",
    x = "Month"
  )

Comment: Retail turnover data are monthly and strongly seasonal (holiday spending, end-of-year peaks). The seasonal naïve method captures recurring monthly patterns.

Australian Takeway Food Turnover Forecast:

takeaway <- aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  summarise(Turnover = sum(Turnover))

takeaway_fit <- takeaway %>%
  model(SNAIVE(Turnover ~ drift()))

takeaway_fc <- takeaway_fit %>%
  forecast(h = "5 years")

autoplot(takeaway, Turnover) +
  autolayer(takeaway_fc, series = "Forecast") +
  labs(title = "Australian Takeaway Food Turnover Forecast")
## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`
## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

5.2: Use the Facebook stock price (data set gafa_stock) to do the following:

a. Produce a time plot of the series.

fb <- gafa_stock %>% filter(Symbol == "FB") %>% 
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

fb %>% autoplot(Close) + labs(title = "Facebook (Meta Inc.) Closing Stock Prices", y = "USD")

b. Produce forecasts using the drift method and plot them.

# fit the model 
fb_fit <- fb %>% model(RW(Close ~ drift()))

# Generate forecasts for the next 1 year 
fb_fc <- fb_fit %>% forecast(h = 200)

# Plot forecast
fb_fc %>% 
  autoplot(fb, color = "red", level = NULL) +
  labs(
    y = "USD",
    title = "Facebook Closing Price Forecast: Drift Model"
  )

c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

# Plot forecast
fb_fc %>% 
  autoplot(fb, color = "red", linewidth = 1, level = NULL) +
  labs(
    y = "USD",
    title = "Facebook Closing Price Forecast: Drift Model"
  ) + 
  #geom_abline(slope = 0.0608, intercept = 54.71, color = "blue", linetype = 2) + 
  geom_segment(aes(x = 1, y = 54.71, xend = 1258, yend = 131.09), color = "blue", linewidth = 1, linetype = 2)
## Warning in geom_segment(aes(x = 1, y = 54.71, xend = 1258, yend = 131.09), : All aesthetics have length 1, but the data has 1258 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

# fit the model 
fb_fit <- fb %>% model(NAIVE(Close))

# Generate forecasts for the next 1 year 
fb_fc <- fb_fit %>% forecast(h = 200)

# Plot forecast
fb_fc %>% 
  autoplot(fb, color = "red", level = NULL) +
  labs(
    y = "USD",
    title = "Facebook Closing Price Forecast: Naive Model"
  )

Comment: Compared with the drift model, the Naïve model is more suitable for forecasting stock prices. It assumes that tomorrow’s price will be the same as today’s price. Because stock prices are often unpredictable and follow a random walk pattern, this simple method is a reliable way to forecast future values.

5.3 Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

Plot the Australian beer production with forecasts by applying seasonal naive method.

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

Conclusion: The Seasonal Naïve model is appropriate for quarterly beer production. Forecasts are reasonable and consistent with historical seasonal patterns. Residuals are approximately white noise, so the model doesn’t systematically miss any structure.

5.4: Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

Plot of Australian Exports:

aus_exports <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Exports)
  
aus_exports |> autoplot(Exports) +
  labs(
    title = "Australian Exports",
    y = "Exports (AUD)",
    x = "Year"
  )

Comment: I have choosen the Naïve model because the data is annual, so a seasonal naïve model is not appropriate.

# Define and estimate a model
aus_exports_fit <- aus_exports |> model(NAIVE(Exports))

# Look at the residuals
aus_exports_fit |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

# Look a some forecasts
aus_exports_fit |> forecast(h = 3) |> autoplot(aus_exports) + labs(title = "Australian Export Forecasts: Naive Mdoel", y = "% GDP")

Visualize time series of the Bricks series from aus_production

aus_production %>%
  autoplot(Bricks) +
  labs(
    title = "Australian Clay Brick Production",
    y = "Bricks",
    x = "Quarter"
  )
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Define and estimate a model
bricks_fit <- aus_production %>% model(NAIVE(Bricks))

# Look at the residuals
bricks_fit %>% gg_tsresiduals()
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 21 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look a some forecasts
bricks_fit %>% forecast(h = 12) |> autoplot(aus_production) + labs(title = "Australian Clay Brick Production: Seasonal Naive Mdoel", y = "Millions")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

5.7: For your retail time series (from Exercise 7 in Section 2.10):

set.seed(123)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

myseries %>% autoplot(Turnover) + labs(title = "Victoria Household and Goods Retail Turnover")

a. Create a training dataset consisting of observations before 2011 using

myseries_train <- myseries |>
  filter(year(Month) < 2011)

b. Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

c. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train |>
  model(SNAIVE(Turnover))

d. Check the residuals.

fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

Do the residuals appear to be uncorrelated and normally distributed?

Comment: The residuals do appear to be correlated based off the fact that many of the lags are significantly correlated. The distribution of the residuals seem to be right skewed around a mean greater then 0 There is also a clear pattern in the residual plot where the variability becomes wider toward the end of the plot so homoscedasticity can not be assumed.

e. Produce forecasts for the test data

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)

#f. Compare the accuracy of your forecasts against the actual values.

fit |> accuracy()
## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Househo… SNAIV… Trai…  25.1  45.6  35.4  5.05  7.29     1     1 0.695
fc |> accuracy(myseries)
## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Vict… Househo… Test   172.  213.  174.  15.1  15.2  4.90  4.68 0.947

g. How sensitive are the accuracy measures to the amount of training data used?

Comment: Increasing the amount of the training data should generally improve the accuracy of the predictions by allowing a model to learn more complex patterns and seasonal variations within the data. This is especially true when dealing with longer horizon periods. In the code below I added 5 more years to the training data and re fit the model, forecasted and calculated the accuracy. Visually inspecting the differences in accuracy measures from the previous forcast, all metrics improved.