Forecasting

5.1 Prior to making forecasts, I will create a basic time series for each data set to observe temporal patterns then I will apply the appropriate forecast.

Time Series created

Australian Population (global_economy)
Bricks (aus_production)
NSW Lambs (aus_livestock)
Household wealth (hh_budget).
Australian takeaway food turnover (aus_retail).

# 1. Australian Population
global_economy %>% 
  filter(Country == "Australia") %>%
  autoplot(Population) + 
  labs(title = "Australian Population Over Time", 
       y = "Population", 
       x = "Year")

# 2. Bricks
aus_production %>%
  autoplot(Bricks) + 
  labs(title = "Australian Brick Production Over Time", 
       y = "Bricks", 
       x = "Quarter")

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Fixed code
aus_livestock %>%
  filter(Animal == "Lambs") %>%  # Add this filter line
  autoplot(Count) +
  labs(title = "Lamb Population Over Time", 
       y = "Number of Lambs",  # Fix the y-label
       x = "Time")

# 4. Household Wealth
hh_budget %>%
  autoplot(Wealth) + 
  labs(title = "Household Wealth Over Time", 
       y = "Wealth", 
       x = "Year")

# 5. Australian Takeaway Food Turnover
aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  autoplot(Turnover) + 
  labs(title = "Australian Takeaway Food Turnover Over Time", 
       y = "Turnover", 
       x = "Month")

##### Forecast for Australian Population (global_economy) using RW y ~ drift)

# Filter for Australian population and create forecast
australia_pop_forecast <- global_economy %>%
  filter(Country == "Australia") %>%
  model(drift = RW(Population ~ drift())) %>%
  forecast(h = 10)

# Plot the forecast
australia_pop_forecast %>%
  autoplot(global_economy %>% filter(Country == "Australia")) +
  labs(title = "Australian Population Forecast using Random Walk with Drift",
       y = "Population",
       x = "Year")

##### Forecast for Australian Population (global_economy) using RW y ~ drift)

# Create the forecast first
bricks_clean <- aus_production %>% 
  filter(!is.na(Bricks))

bricks_forecast <- bricks_clean %>%
  model(snaive = SNAIVE(Bricks)) %>%
  forecast(h = 12)

# Use a basic plot with theme colors
bricks_forecast %>%
  autoplot(aus_production) +
  labs(title = "Australian Brick Production Forecast using Seasonal Naive",
       y = "Bricks",
       x = "Quarter") +
  theme_bw()

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

bricks_forecast

## # A fable: 12 x 4 [1Q]
## # Key:     .model [1]
##    .model Quarter
##    <chr>    <qtr>
##  1 snaive 2005 Q3
##  2 snaive 2005 Q4
##  3 snaive 2006 Q1
##  4 snaive 2006 Q2
##  5 snaive 2006 Q3
##  6 snaive 2006 Q4
##  7 snaive 2007 Q1
##  8 snaive 2007 Q2
##  9 snaive 2007 Q3
## 10 snaive 2007 Q4
## 11 snaive 2008 Q1
## 12 snaive 2008 Q2
## # ℹ 2 more variables: Bricks <dist>, .mean <dbl>

SNAIVE on Lamb data with a focus on New South Wales

# Focus on NSW only
nsw_lambs_forecast <- aus_livestock %>%
  filter(Animal == "Lambs", State == "New South Wales", !is.na(Count)) %>%
  model(snaive = SNAIVE(Count)) %>%
  forecast(h = 24)

nsw_lambs_forecast %>%
  autoplot(aus_livestock %>% filter(Animal == "Lambs", State == "New South Wales")) +
  labs(title = "NSW Lamb Population Forecast - SNAIVE",
       y = "Number of Lambs",
       x = "Time")

##### Australian Household Wealth Forecast using Random Walk with Drift

# Filter for the three states with upward trends
takeaway_trending_states <- aus_retail %>%
  filter(Industry == "Takeaway food services",
         State %in% c("New South Wales", "Victoria", "Queensland"))

# Create RW with drift forecast
takeaway_drift_forecast <- takeaway_trending_states %>%
  model(drift = RW(Turnover ~ drift())) %>%
  forecast(h = 24)

# Plot the forecast
takeaway_drift_forecast %>%
  autoplot(takeaway_trending_states) +
  labs(title = "Takeaway Food Turnover Forecast - NSW, VIC, QLD (RW with Drift)",
       y = "Turnover (AUD Million)",
       x = "Month")

##### Australian Household Wealth Forecast using Random Walk with Drift

# Create RW with drift forecast for household wealth
wealth_forecast <- hh_budget %>%
  model(drift = RW(Wealth ~ drift())) %>%
  forecast(h = 10)

wealth_forecast %>%
  autoplot(hh_budget) +
  scale_fill_manual(values = c("80%" = "lightblue", "95%" = "lightcoral")) +
  labs(title = "Australian Household Wealth Forecast - RW with Drift",
       y = "Wealth (AUD)",
       x = "Year")

## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's fill values.

5.2a Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series.
Produce forecasts using the drift method and plot them.
Show that the forecasts are identical to extending the line drawn between the first and last observations.
Try using some of the other benchmark functions to forecast the same data set.
Which do you think is best? Why?

library(fpp3)

# Load the data and filter for Facebook
facebook <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  update_tsibble(regular = TRUE)  # Add this line to fix irregular time series

# 1. Produce a time plot of the series
facebook %>%
  autoplot(Close) +
  labs(title = "Facebook Stock Price Over Time",
       y = "Closing Price (USD)",
       x = "Date")

5.22b Forecasts using drift method

facebook_regular <- facebook %>%
  tsibble::fill_gaps() %>%  # Fill missing dates
  tidyr::fill(Close, .direction = "down")  # Forward fill prices

# Alternative approach - make it regular
facebook_regular <- facebook %>%
  update_tsibble(regular = TRUE)

# Now create the drift forecast
facebook_drift <- facebook_regular %>%
  model(drift = RW(Close ~ drift())) %>%
  forecast(h = 30)

## Warning: 1 error encountered for drift
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

facebook_drift %>%
  autoplot(facebook_regular) +
  labs(title = "Facebook Stock Price Forecast - Drift Method",
       y = "Closing Price (USD)",
       x = "Date")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).

5.2c Graph showing forecasts are identical to line between first and last observations

first_date <- min(facebook$Date)
last_date <- max(facebook$Date)
first_price <- facebook$Close[facebook$Date == first_date]
last_price <- facebook$Close[facebook$Date == last_date]

# Calculate the slope of the line
total_days <- as.numeric(last_date - first_date)
slope <- (last_price - first_price) / total_days

# Plot with comparison line
facebook_drift %>%
  autoplot(facebook) +
  geom_smooth(data = facebook, aes(x = Date, y = Close), 
              method = "lm", se = FALSE, color = "green", linetype = "dashed") +
  labs(title = "Facebook Forecast vs Linear Trend Line",
       subtitle = "Green dashed line shows trend between first and last observations",
       y = "Closing Price (USD)",
       x = "Date")

## `geom_smooth()` using formula = 'y ~ x'

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).

5.2d Comparing with other benchmark methods

# 4. Compare with other benchmark methods
facebook_comparison <- facebook %>%
  model(
    naive = NAIVE(Close),
    drift = RW(Close ~ drift()),
    mean = MEAN(Close)
  ) %>%
  forecast(h = 30)

## Warning: 1 error encountered for naive
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

## Warning: 1 error encountered for drift
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

facebook_comparison %>%
  autoplot(facebook) +
  labs(title = "Facebook Stock Price - Comparison of Forecast Methods",
       y = "Closing Price (USD)",
       x = "Date")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 60 rows containing missing values or values outside the scale range
## (`geom_line()`).

5.3 Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts.

recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
fit <- recent_production |> model(SNAIVE(Beer))
fit |> gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

fit |> forecast() |> autoplot(recent_production)

fit |> augment() |> 
  features(.innov, ljung_box, lag = 12)

## # A tibble: 1 × 3
##   .model       lb_stat lb_pvalue
##   <chr>          <dbl>     <dbl>
## 1 SNAIVE(Beer)    38.7  0.000116

forecasts <- fit |> forecast(h = 8)

forecasts

## # A fable: 8 x 4 [1Q]
## # Key:     .model [1]
##   .model       Quarter
##   <chr>          <qtr>
## 1 SNAIVE(Beer) 2010 Q3
## 2 SNAIVE(Beer) 2010 Q4
## 3 SNAIVE(Beer) 2011 Q1
## 4 SNAIVE(Beer) 2011 Q2
## 5 SNAIVE(Beer) 2011 Q3
## 6 SNAIVE(Beer) 2011 Q4
## 7 SNAIVE(Beer) 2012 Q1
## 8 SNAIVE(Beer) 2012 Q2
## # ℹ 2 more variables: Beer <dist>, .mean <dbl>

Based on the ACF graph, I conclude that the residuals do not resemble white noise. Several significant spikes extend beyond the blue dashed confidence lines, indicating strong autocorrelation at those lags. The innovation residuals plot reveals considerable variation and clustering of positive and negative values rather than random fluctuation around zero. This systematic pattern suggests the SNAIVE model has not fully captured the underlying structure in the beer production data, as true white noise residuals should exhibit random, unpredictable behavior with no discernible patterns.

*5.4a A seasonal naïve method on the quarterly Australian exports from global economy.**

# Extract Australian exports data and remove NA values
aus_exports <- global_economy |>
  filter(Country == "Australia") |>
  filter(!is.na(Exports))

# Use appropriate methods for annual data
fit_exports <- aus_exports |> 
  model(
    naive = NAIVE(Exports),
    drift = RW(Exports ~ drift())
  )

# Check residuals for naive method
fit_exports |> select(naive) |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

# Check residuals for drift method  
fit_exports |> select(drift) |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

# Plot forecasts
fit_exports |> 
  forecast(h = 8) |> 
  autoplot(aus_exports) +
  labs(title = "Australian Exports Forecast - NAIVE and RW with Drift",
       y = "Exports",
       x = "Year")

5.4a Summary

These residual diagnostics indicate that the residuals do NOT represent white noise. The time series plot reveals concerning patterns, including apparent increasing variance over time (heteroscedasticity) and some clustering of residuals, particularly visible in the later years where there are extreme spikes.

The ACF plot shows several significant correlations at various lags that extend beyond the blue confidence bands, indicating autocorrelation in the residuals. While the histogram displays a roughly normal distribution which is positive, the presence of autocorrelation and changing variance patterns in the other two panels clearly violate the white noise assumptions.

This suggests that the Random Walk with Drift model has not adequately captured all the systematic patterns in the Australian household wealth data, and the residuals retain predictable structure that could potentially be modeled with a more sophisticated forecasting approach such as the ARIMA or ETS models.

5.4b A seasonal naïve method on the Bricks series from aus_production.

# Remove NA values from bricks data
aus_production_clean <- aus_production |>
  filter(!is.na(Bricks))

# Define and estimate SNAIVE model for bricks
fit_bricks <- aus_production_clean |> 
  model(SNAIVE(Bricks))

# Check residuals
fit_bricks |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# Plot forecasts
fit_bricks |> 
  forecast(h = 12) |> 
  autoplot(aus_production_clean) +
  labs(title = "Australian Brick Production Forecast - SNAIVE",
       y = "Bricks",
       x = "Quarter")

*5.4b Summary** The residual analysis reveals clear violations of white noise assumptions across multiple diagnostic measures. The time series plot shows clear clustering of positive and negative residuals rather than random scatter, while the ACF plot displays multiple significant spikes well beyond the blue confidence lines, indicating strong autocorrelation. Although the histogram appears reasonably normal, which is positive, the other two panels show clear violations of white noise assumptions

5.7

library(fpp3)

# Your retail series
set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))

# Step 1: Create training dataset
myseries_train <- myseries |>
  filter(year(Month) < 2011)

# Step 2: Check data split
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red") +
  labs(title = "Training vs Full Dataset",
       subtitle = "Red = Training data (before 2011)")

# Step 3: Fit seasonal naive model
fit <- myseries_train |>
  model(SNAIVE(Turnover))

# Step 4: Check residuals
fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# Step 5: Produce forecasts for test data
fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries) +
  labs(title = "SNAIVE Forecasts vs Actual Data")

# Step 6: Compare accuracy
cat("Training accuracy:\n")

## Training accuracy:

fit |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768

cat("\nTest accuracy:\n")

## 
## Test accuracy:

fc |> accuracy(myseries)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

5.7 The residuals don’t appear to fluctuate randomly around zero, and the region between January 1995 and January 1997 shows large negative and positive spikes. The variance pattern changes over time, but within each pattern there is stable variation. These residuals do not show white noise. The large negative residuals around 1995-1996 particularly suggest there may have been a structural change or unusual event affecting this retail series.

Forecasting

Candace Grant

2025-09-19

Time Series created

SNAIVE on Lamb data with a focus on New South Wales