Homework 3

5.1

library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.1
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(patchwork)

data("global_economy")
data("aus_production")
data("aus_livestock")
data("hh_budget")
data("aus_retail")

# 1. 
aus_population <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Population)

pop_fit <- aus_population %>%
  model(RW(Population ~ drift()))

pop_forecast <- pop_fit %>%
  forecast(h = 10)

# 2. Bricks (aus_production) 
bricks_fit <- aus_production %>%
  model(SNAIVE(Bricks))

bricks_forecast <- bricks_fit %>%
  forecast(h = "2 years")

# 3. NSW Lambs (aus_livestock) 
nsw_lambs <- aus_livestock %>%
  filter(State == "New South Wales", Animal == "Lambs") %>%
  select(Month, Count)

lambs_fit <- nsw_lambs %>%
  model(SNAIVE(Count))

lambs_forecast <- lambs_fit %>%
  forecast(h = "2 years")

# 4. Household wealth (hh_budget) 
wealth_fit <- hh_budget %>%
  model(RW(Wealth ~ drift()))

wealth_forecast <- wealth_fit %>%
  forecast(h = 10)

# 5. Australian takeaway food turnover (aus_retail) 
takeaway_food <- aus_retail %>%
  filter(Industry == "Takeaway food services") %>%
  select(Month, Turnover)

takeaway_fit <- takeaway_food %>%
  model(SNAIVE(Turnover))

takeaway_forecast <- takeaway_fit %>%
  forecast(h = "2 years")

# Plot
pop_forecast_plot <- autoplot(pop_forecast) + ggtitle("Australian Population Forecast")
bricks_forecast_plot <- autoplot(bricks_forecast) + ggtitle("Bricks Forecast")
lambs_forecast_plot <- autoplot(lambs_forecast) + ggtitle("NSW Lambs Forecast")
wealth_forecast_plot <- autoplot(wealth_forecast) + ggtitle("Household Wealth Forecast")
takeaway_forecast_plot <- autoplot(takeaway_forecast) + ggtitle("Takeaway Food Turnover Forecast")

pop_forecast_plot / bricks_forecast_plot / lambs_forecast_plot / wealth_forecast_plot / takeaway_forecast_plot

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

## 5.2

library(dplyr)
library(fpp3)

data("gafa_stock")

fb_stock <- gafa_stock %>%
  filter(Symbol == "FB") %>%
  dplyr::select(Date, Close) %>%
  as_tsibble(index = Date)

all_days <- seq.Date(min(fb_stock$Date), max(fb_stock$Date), by = "day") %>%
  as_tibble() %>%
  rename(Date = value)

# Left-join to add missing days, forward-fill weekend prices
fb_stock <- all_days %>%
  left_join(fb_stock, by = "Date") %>%
  as_tsibble(index = Date) %>%
  fill(Close, .direction = "down") %>%
  drop_na() %>%                       
  update_tsibble(regular = TRUE)      

print(is_regular(fb_stock))

## [1] TRUE

############################################
### Drift Forecast (6 months)
############################################

# Fit drift model
fit_drift <- fb_stock %>%
  model(RW(Close ~ drift()))

# Forecast 6 months (≈180 days including weekends)
fc_drift <- fit_drift %>%
  forecast(h = "6 months")

# Plot the drift forecast
autoplot(fb_stock, Close) +
  autolayer(fc_drift, colour = "blue", level = NULL) +
  ggtitle("Drift Forecast (6 Months) for Facebook Stock") +
  labs(x = "Date", y = "Close Price (USD)")

############################################
### Show Drift = Line Between First & Last
############################################

# Calculate slope from first to last observation
start_price <- first(fb_stock$Close)
end_price   <- last(fb_stock$Close)
start_date  <- first(fb_stock$Date)
end_date    <- last(fb_stock$Date)

slope <- (end_price - start_price) / as.numeric(end_date - start_date)

# Manual drift line
fb_stock_line <- fb_stock %>%
  mutate(ManualDrift = start_price + slope * as.numeric(Date - start_date))

# Actual vs. manual drift line
autoplot(fb_stock_line, Close) +
  geom_line(aes(y = ManualDrift), colour = "red", linetype = "dashed") +
  ggtitle("Drift Forecast as Straight Line between First and Last Observations") +
  labs(x = "Date", y = "Close Price (USD)")

############################################
### Other Benchmark Methods
############################################

# Naïve
fit_naive <- fb_stock %>%
  model(NAIVE(Close))

fc_naive <- fit_naive %>%
  forecast(h = "6 months")

# Seasonal Naïve (SNAIVE)
fit_snaive <- fb_stock %>%
  model(SNAIVE(Close))

fc_snaive <- fit_snaive %>%
  forecast(h = "6 months")

autoplot(fb_stock, Close, colour = "black") +
  autolayer(fc_naive,  level = NULL, colour = "red") +
  autolayer(fc_drift, level = NULL, colour = "blue") +
  autolayer(fc_snaive, level = NULL, colour = "green") +
  scale_color_identity(
    name = "Method",
    breaks = c("black","red","blue","green"),
    labels = c("Actual Data","Naïve Forecast","Drift Forecast","Seasonal Naïve"),
    guide = "legend"
  ) +
  ggtitle("Comparison of Benchmark Forecasts for Facebook Stock (6 Months)") +
  labs(x = "Date", y = "Close Price (USD)")

## 5.3

library(fpp3)

# Extract data of interest (from 1992 onwards)
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)

# Define and estimate a Seasonal Naïve model
fit <- recent_production |> model(SNAIVE(Beer))

# Plot residual diagnostics to check for white noise
fit |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Generate and plot forecasts
fit |> forecast(h = "2 years") |> autoplot(recent_production) +
  ggtitle("Seasonal Naïve Forecast for Australian Beer Production") +
  ylab("Beer Production (ML)") +
  xlab("Year")

Seasonal naïve model appears to have adequately captured the main seasonal pattern.

5.4

library(fpp3)

aus_exports <- global_economy |> 
  filter(Country == "Australia") |> 
  select(Year, Exports)

export_fit <- aus_exports |> model(NAIVE(Exports))

export_fit |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

export_fit |> forecast(h = 10) |> autoplot(aus_exports) +
  ggtitle("Naive Forecast for Australian Exports") +
  ylab("Exports (% of GDP)") +
  xlab("Year")

### Bricks Production (aus_production) 
bricks_data <- aus_production |> select(Quarter, Bricks)

# SNAIVE() since brick production is seasonal
bricks_fit <- bricks_data |> model(SNAIVE(Bricks))

bricks_fit |> gg_tsresiduals()

## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).

bricks_fit |> forecast(h = "2 years") |> autoplot(bricks_data) +
  ggtitle("Seasonal Naive Forecast for Bricks Production") +
  ylab("Bricks Production (Million)") +
  xlab("Year")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

## 5.7

#a
set.seed(12345678)
myseries <- aus_retail |>
    filter(`Series ID` == sample(aus_retail$`Series ID`,1)) |> 
  mutate(Month = yearmonth(Month)) |>  
  as_tsibble(index = Month) |> 
  drop_na() 

myseries_train <- myseries |> 
  filter(year(Month) < 2011)
#b
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

#c
fit <- myseries_train |>
  model(SNAIVE(Turnover))
#d
fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

#e
fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

#f
fit |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768

fc |> accuracy(myseries)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

How sensitive are the accuracy measures to the amount of training data used? Test error is higher than training error, could be overfitting to training data. Although more training data is good for a stable model and help in improving accuracy, it can also be overfitted so there is a fine balance that needs to be met.

Homework 3

Amish Rasheed

2025-02-23

5.1

5.4