library(fpp3)

## Warning: package 'fpp3' was built under R version 4.5.2

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──

## ✔ tibble      3.3.0     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.4     ✔ fable       0.5.0
## ✔ ggplot2     4.0.0

## Warning: package 'tsibble' was built under R version 4.5.2

## Warning: package 'tsibbledata' was built under R version 4.5.2

## Warning: package 'feasts' was built under R version 4.5.2

## Warning: package 'fabletools' was built under R version 4.5.2

## Warning: package 'fable' was built under R version 4.5.2

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.4     ✔ stringr 1.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(purrr)

5.1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y~ drift()) is more appropriate in each case:

Australian Population (global_economy)

I used a random walk with drift for Australian population because it shows steady long run growth and does not have seasonality in the yearly data.

# filter to Australia and keep the population series
aus_pop <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Population)

# fit a random walk with drift to capture long-run growth
fit_pop <- aus_pop %>%
  model(rw_drift = RW(Population ~ drift()))

# forecast 
fc_pop <- fit_pop %>%
  forecast(h = "10 years")

# plot the historical data and forecast
fc_pop %>%
  autoplot(aus_pop) +
  labs(
    title = "Australia population forecast using RW with drift",
    y = "Population"
  )

Bricks (aus_production)

I used a seasonal naive model for bricks because the quarterly series has a repeating seasonal pattern.

# keep the Bricks series
bricks <- aus_production %>%
  select(Quarter, Bricks) %>%
  filter(!is.na(Bricks))

# fit seasonal naive model
fit_bricks <- bricks %>%
  model(snaive = SNAIVE(Bricks))

# forecast next 8 quarters
fc_bricks <- fit_bricks %>%
  forecast(h = 8)

# plot the results
fc_bricks %>%
  autoplot(bricks) +
  labs(
    title = "Bricks forecast using seasonal naive", 
    y = "Bricks"
  )

NSW Lambs (aus_livestock)

I used a seasonal naive model for NSW lambs because the monthly counts show strong seasonality across years.

# filter to NSW lambs
nsw_lambs <- aus_livestock %>%
  filter(State == "New South Wales", Animal == "Lambs") %>%
  select(Month, Count)

# fit seasonal naive model
fit_lambs <- nsw_lambs %>%
  model(snaive = SNAIVE(Count))

# forecast 24 months
fc_lambs <- fit_lambs %>%
  forecast(h = 24)

# plot the results
fc_lambs %>%
  autoplot(nsw_lambs) +
  labs(
    title = "NSW lambs forecast using seasonal naive",
    y = "Count"
  )

Household wealth (`hh_budget)

I used a random walk with drift for household wealth because it follows a trend over time and does not show a clear seasonal cycle.

# keep the wealth series 
hh_wealth <- hh_budget %>%
  filter(Country == "Australia") %>%
  select(Year, Wealth)

# fit random walk with drift
fit_wealth <- hh_wealth %>%
  model(rw_drift = RW(Wealth ~ drift()))

# forecast 10 years
fc_wealth <- fit_wealth %>%
  forecast(h = "10 years")

# plot the results
fc_wealth %>%
  autoplot(hh_wealth) +
  labs(
    title = "Household wealth forecast using RW with drift",
    y = "Wealth"
  )

Australian takeaway food turnover (aus_retail)

I used a seasonal naive model for takeaway food turnover because retail turnover is strongly seasonal at the monthly level.

# filter to takeaway food series
takeaway <- aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  index_by(Month) %>% 
  summarise(Turnover = sum(Turnover, na.rm = TRUE))
  

# fit seasonal naive model
fit_takeaway <- takeaway %>%
  model(snaive = SNAIVE(Turnover))

# forecast next 24 months
fc_takeaway <- fit_takeaway %>%
  forecast(h = 24)

# plot
fc_takeaway %>%
  autoplot(takeaway) +
  labs(
    title = "Takeaway food turnover forecast using seasonal naive",
    y = "Turnover"
  )

5.2

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series

# filter the Facebook stock series and keep the adjusted close price
fb <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  select(Date, Close)

# plot the time series
fb %>%
  autoplot(Close) +
  labs(
    title = "Facebook closing price",
    y = "Close Price"
  )

Produce forecasts using the drift method and plot them

# Make the series regular daily
fb_reg <- fb %>%
  arrange(Date) %>%
  as_tibble() %>%
  tidyr::complete(Date = seq(min(Date), max(Date), by = "day")) %>%
  tidyr::fill(Close, .direction = "down") %>%
  filter(!is.na(Close)) %>%
  as_tsibble(index = Date) %>%
  update_tsibble(regular = TRUE)

# fit a random walk with drift model
fit_drift <- fb_reg %>%
  model(drift = RW(Close ~ drift()))

# forecast the next 30 days
fc_drift <- fit_drift %>%
  forecast(h = 30)

# plot forecast with the historical data
fc_drift %>%
  autoplot(fb_reg) +
  labs(
    title = "Facebook forecast using drift method", 
    y = "Close price")

show that the forecasts are identical to extending the line drawn between the first and last observations

h <- 30  # forecast horizon

# Step 1: Build regular daily series (same as part b)
fb <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  select(Date, Close) %>%
  arrange(Date) %>%
  as_tibble()

fb_reg <- fb %>%
  tidyr::complete(Date = seq(min(Date), max(Date), by = "day")) %>%
  tidyr::fill(Close, .direction = "down") %>%
  filter(!is.na(Close)) %>%
  as_tsibble(index = Date) %>%
  update_tsibble(regular = TRUE)

# Step 2: Fit drift + forecast
fit_drift <- fb_reg %>%
  model(drift = RW(Close ~ drift()))

fc_drift <- fit_drift %>%
  forecast(h = h)

# Step 3: One-row summary of first/last (force tibble to avoid grouped output)
fb_summary <- fb_reg %>%
  as_tibble() %>%
  summarise(
    first_date  = first(Date),
    last_date   = last(Date),
    first_close = first(Close),
    last_close  = last(Close),
    n = n()
  )

fb_summary

# Step 4: Slope of the line between first and last observation
slope <- (fb_summary$last_close - fb_summary$first_close) / (fb_summary$n - 1)

# Step 5: Build an extended line (first date through last date + h)
line_df <- tibble(
  Date = seq(fb_summary$first_date, fb_summary$last_date + h, by = "day")
) %>%
  mutate(line = fb_summary$first_close + slope * (as.numeric(Date - fb_summary$first_date)))

# Step 6: Plot series + extended first-to-last line
fb_reg %>%
  autoplot(Close) +
  geom_line(data = line_df, aes(x = Date, y = line), linetype = "dashed", linewidth = 0.9) +
  labs(
    title = "First-to-last line extended (drift forecast idea)",
    y = "Close price"
  )

Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

# fit a set of benchmark models
fit_bench <- fb_reg %>%
  model(
    mean = MEAN(Close),
    naive = NAIVE(Close),
    drift = RW(Close ~ drift())
  )

# forecast 30 days ahead for each model
fc_bench <- fit_bench %>%
  forecast(h = 30)

# plot forecasts together
fc_bench %>%
  autoplot(fb) +
  facet_wrap(~.model, ncol = 1) +
  labs(
    title = "Benchmark forecasts for Facebook close price", y = "Close price"
  )

## Using `Date` as index variable.

# decide train/test sizes
n_total <- nrow(fb_reg)
n_test  <- 60
n_train <- n_total - n_test

# split
fb_train <- fb_reg %>% slice_head(n = n_train)
fb_test  <- fb_reg %>% slice_tail(n = n_test)

# fit models on training data
fit_train <- fb_train %>%
  model(
    mean  = MEAN(Close),
    naive = NAIVE(Close),
    drift = RW(Close ~ drift())
  )

# forecast length of test set
fc_train <- fit_train %>%
  forecast(h = n_test)

# accuracy on test set
fc_train %>%
  accuracy(fb_test)

I compared mean, naive, and drift benchmarks using the last 60 observations as a test set. Based on RMSE and MAE, the naive method performs best for this sample, which makes sense because stock prices often behave like a random walk where the most recent value is a strong baseline.

5.3

Apply a seasonal naive method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

# extract the quarterly beer production data from 1992 onward
recent_production <- aus_production %>%
  filter(year(Quarter) >= 1992) %>%
  select(Quarter, Beer) %>%
  filter(!is.na(Beer))

# fit a seasonal naive model
fit <- recent_production %>%
  model(snaive = SNAIVE(Beer))

# check residuals for white-noise behavior
fit %>%
  augment() %>%
  filter(!is.na(.innov)) %>%
  gg_tsdisplay(.innov, plot_type = "partial")

## Warning: `gg_tsdisplay()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsdisplay()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# plot the forecasts
fit %>%
  forecast(h = 8) %>%
  autoplot(recent_production) +
  labs(
    title = "Quarterly Australian beer production forecast (SNAIVE), from 1992",
    y = "Beer production"
  )

I used a seasonal naive model on quarterly beer production from 1992 because the pattern repeats each year by quarter. The forecast basically reuses the same quarter from the previous year, so it captures the main seasonal shape. When I checked the residuals, they were mostly centered around zero, but the ACF still shows a noticeable spike at some lags. That suggests the model is a reasonable baseline, but the residuals are not fully white noise, so a more flexible model could likely improve the fit.

5.4

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

# filter australia and keep Exports
aus_exports <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Exports) %>%
  filter(!is.na(Exports))

# fit NAIVE model 
fit_exports <- aus_exports %>%
  model(NAIVE(Exports))

# residual diagnostics
fit_exports %>%
  augment() %>%
  filter(!is.na(.innov)) %>%
  gg_tsdisplay(.innov, plot_type = "partial")

# forecast next 10 years and plot
fit_exports %>%
  forecast(h = 10) %>%
  autoplot(aus_exports) +
  labs(
    title = "Australian exports forecast (NAIVE)", 
    y = "Exports"
  )

I used NAIVE because exports are annual and there is no seasonal cycle to repeat. The residual diagnostics look fairly random with no strong pattern, so NAIVE is a reasonable baseline for this series.

# keep Bricks series and remove missing values
bricks <- aus_production %>%
  select(Quarter, Bricks) %>%
  filter(!is.na(Bricks))

# fit SNAIVE model (quarterly seasonality)
fit_bricks <- bricks %>%
  model(SNAIVE(Bricks))

# residual diagnostics
fit_bricks %>%
  augment() %>%
  filter(!is.na(.innov)) %>%
  gg_tsdisplay(.innov, plot_type = "partial")

# forecast next 8 quarters and plot
fit_bricks %>%
  forecast(h = 8) %>%
  autoplot(bricks) +
  labs(
    title = "Bricks forecast (SNAIVE)", 
    y = "Bricks")

5.7

For your retail time series (from Exercise 7 in Section 2.10):

Create a training dataset consisting of observations before 2011

myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1)) %>%
  select(Month, Turnover) %>%
  filter(!is.na(Turnover))
  

myseries %>%
  glimpse()

## Rows: 369
## Columns: 2
## $ Month    <mth> 1988 Apr, 1988 May, 1988 Jun, 1988 Jul, 1988 Aug, 1988 Sep, 1…
## $ Turnover <dbl> 25.6, 26.7, 27.7, 28.2, 28.2, 29.4, 28.3, 27.4, 30.9, 26.7, 2…

# training data w/ observations before 2011
myseries_train <- myseries %>%
  filter(year(Month) < 2011)

# quick range check
myseries %>%
  as_tibble() %>%
  summarise(min_month = min(Month), max_month = max(Month), n = n())

myseries_train %>%
  as_tibble() %>%
  summarise(min_month = min(Month), max_month = max(Month), n = n())

Check that your data have been split appropriately by producing the following plot.

# visual check that the split is correct
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red") +
  labs(
    title = "Retail Turnover with training portion highlighted",
    y = "Turnover")

Fit a seasonal naive model using SNAIVE() applied to your training data (myseries_train)

# fit SNAIVE on training data
fit <- myseries_train %>%
  model(snaive = SNAIVE(Turnover))

fit

Check the residuals

feasts::gg_tsresiduals(fit)

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).

The residuals are not really white noise. The ACF has several bars outside the bounds, which suggests there is still correlation left after using seasonal naive. The histogram looks roughly centered, but there are some larger residuals, especially near the end, so it is not perfectly normal.

Produce forecasts for the test data

# define test data 
myseries_test <- anti_join(myseries, myseries_train, by = "Month")

# forecast over the test period only
fc <- fit %>%
  forecast(new_data = myseries_test)

# plot forecasts overlaid on the full series
fc %>%
  autoplot(myseries) +
  labs(
    title = "SNAIVE forecast on test period", 
    y = "Turnover")

Compare the accuracy of your forecasts against the actual values

# accuracy on training data
fit %>% accuracy()

# accuracy on test data (compare forecasts to actual)
fc %>% accuracy(myseries)

How sensitive are the accuracy measures to the amount of training data used?

# compare accuracy under different train/test cutoffs
cutoffs <- c(2007, 2009, 2011, 2013)

acc_by_cutoff <- map_dfr(cutoffs, function(cut) {
  train <- myseries %>% filter(year(Month) < cut)
  test  <- anti_join(myseries, train, by = "Month")

  fit_cut <- train %>% model(snaive = SNAIVE(Turnover))
  fc_cut  <- fit_cut %>% forecast(new_data = test)

  fc_cut %>%
    accuracy(myseries) %>%
    mutate(train_end_year = cut - 1) %>%
    select(train_end_year, RMSE, MAE, MAPE)
})

acc_by_cutoff

The accuracy measures change a lot when I change the training cutoff. In my results, RMSE, MAE, and MAPE get larger as I move the cutoff later, which suggests the series level and seasonal size changed over time and the seasonal naive model does not adapt well when the holdout period behaves differently from the earlier history.

Data 624 HW# 3

Jayden Jiang

2026-02-28

5.1

5.2

5.3

5.4

5.7