#Exercise 5.1

#5.1(a) Australian Population (global_economy)
aus_pop <- global_economy |>
  filter(Country == "Australia") |>
  select(Year, Population)

fit_pop <- aus_pop |>
  model(RW(Population ~ drift()))

fc_pop <- fit_pop |>
  forecast(h = 10)

autoplot(fc_pop, aus_pop) +
  labs(title = "Australian population: RW with drift", y = "Population")

#5.1(b) Bricks (aus_production)
bricks <- aus_production |>
  select(Quarter, Bricks)

fit_bricks <- bricks |>
  model(SNAIVE(Bricks))

fc_bricks <- fit_bricks |>
  forecast(h = "2 years")

autoplot(fc_bricks, bricks) +
  labs(title = "Bricks: Seasonal naive", y = "Bricks")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

#5.1(c) NSW Lambs (aus_livestock)
nsw_lambs <- aus_livestock |>
  filter(State == "New South Wales",
         Animal == "Lambs")

fit_lambs <- nsw_lambs |>
  model(SNAIVE(Count))

fc_lambs <- fit_lambs |>
  forecast(h = "2 years")

autoplot(fc_lambs, nsw_lambs) +
  labs(title = "NSW Lamb Slaughter: Seasonal Naive Forecast",
       y = "Number of Lambs")

#5.1(d) Household wealth (hh_budget)
wealth <- hh_budget |>
  select(Year, Wealth)

fit_wealth <- wealth |>
  model(RW(Wealth ~ drift()))

fc_wealth <- fit_wealth |>
  forecast(h = 8)

autoplot(fc_wealth, wealth) +
  labs(title = "Household wealth: RW with drift", y = "Wealth")

#5.1(e) Australian takeaway food turnover (aus_retail)
takeaway <- aus_retail |>
  filter(Industry == "Takeaway food services") |>
  select(Month, Turnover)

fit_takeaway <- takeaway |>
  model(SNAIVE(Turnover))

fc_takeaway <- fit_takeaway |>
  forecast(h = "2 years")

autoplot(fc_takeaway, takeaway) +
  labs(title = "Takeaway food turnover: Seasonal naive", y = "Turnover")

##5.2

# ---- Data prep: make FB a regular daily tsibble ----
fb <- gafa_stock |>
  filter(Symbol == "FB") |>
  select(Date, Close) |>
  arrange(Date) |>
  distinct(Date, .keep_all = TRUE) |>
  as_tsibble(index = Date) |>
  fill_gaps() |>
  fill(Close, .direction = "down")

# 1) Time plot
autoplot(fb, Close) +
  labs(title = "Facebook (FB) daily closing price", y = "Close")

# 2) Drift forecasts + plot
fit_drift <- fb |>
  model(drift = RW(Close ~ drift()))

fc_drift <- fit_drift |>
  forecast(h = 60)

autoplot(fc_drift, fb) +
  labs(title = "FB forecasts: Random walk with drift (60 days)", y = "Close")

# 3) Show drift forecasts = extending line from first to last observation
fb_tbl <- fb |> as_tibble()

y1 <- fb_tbl$Close[1]
yT <- fb_tbl$Close[nrow(fb_tbl)]
Tn <- nrow(fb_tbl)

drift_val <- (yT - y1) / (Tn - 1)
h <- 60

manual_fc <- yT + drift_val * (1:h)

fable_fc <- fc_drift |> as_tibble() |> pull(.mean)

all.equal(fable_fc, manual_fc, tolerance = 1e-8)
## [1] TRUE
# If TRUE, you've shown they are identical.

# 4) Other benchmark forecasts + compare
fit_bench <- fb |>
  model(
    mean  = MEAN(Close),
    naive = NAIVE(Close),
    drift = RW(Close ~ drift())
  )

fc_bench <- fit_bench |>
  forecast(h = 60)

autoplot(fc_bench, fb) +
  labs(title = "FB benchmark forecasts comparison", y = "Close")

##Between the benchmarks, NAIVE is usually the safest for stock prices because prices are often close to a random walk with no predictable long-term direction. DRIFT can over-extrapolate the historical average increase and may be too optimistic if the growth rate changes.

#5.3


# Extract data from 1992 onward
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)

# Fit seasonal naïve model
fit <- recent_production |>
  model(SNAIVE(Beer))

# Residual diagnostics
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# Forecast and plot
fit |>
  forecast(h = "2 years") |>
  autoplot(recent_production) +
  labs(title = "Australian Beer Production: Seasonal Naïve Forecasts",
       y = "Beer production")

# Check residual statistics
fit |> accuracy()
## # A tibble: 1 × 10
##   .model       .type       ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
##   <chr>        <chr>    <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 SNAIVE(Beer) Training -1.57  16.1  13.6 -0.426  3.16     1     1 -0.237

Conclusion:

The seasonal naïve model captures the strong quarterly seasonal pattern

in Australian beer production reasonably well.

From the residual diagnostics:

- The residuals fluctuate around zero.

- There is no strong remaining seasonal pattern.

- However, some autocorrelation may still be present.

Therefore, the residuals are approximately white noise, but not perfectly.

The SNAIVE model provides a reasonable benchmark forecast, though

a more advanced model (such as ETS or ARIMA) may improve accuracy.

#5.4


# Extract Australian exports
exports_aus <- global_economy |>
  filter(Country == "Australia") |>
  select(Year, Exports)

# Fit naive model
fit_exports <- exports_aus |>
  model(NAIVE(Exports))

# Residual diagnostics
fit_exports |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

# Forecasts
fit_exports |>
  forecast(h = 10) |>
  autoplot(exports_aus) +
  labs(title = "Australian Exports: Naive Forecasts",
       y = "Exports")

# Accuracy table
fit_exports |> accuracy()
## # A tibble: 1 × 10
##   .model         .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>          <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 NAIVE(Exports) Training 0.145  1.24 0.985 0.611  5.83     1     1 -0.306
# Conclusion:
# Australian exports are annual data with no seasonal pattern.
# Therefore, the naive method is appropriate.
# The residuals fluctuate around zero with no strong pattern,
# though some autocorrelation may remain.
# The naive model provides a reasonable benchmark forecast.



# Extract bricks data
bricks <- aus_production |>
  select(Quarter, Bricks)

# Fit seasonal naive model
fit_bricks <- bricks |>
  model(SNAIVE(Bricks))

# Residual diagnostics
fit_bricks |> gg_tsresiduals()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# Forecasts
fit_bricks |>
  forecast(h = "2 years") |>
  autoplot(bricks) +
  labs(title = "Australian Bricks: Seasonal Naive Forecasts",
       y = "Bricks")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Accuracy
fit_bricks |> accuracy()
## # A tibble: 1 × 10
##   .model         .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>          <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Bricks) Training  4.21  48.3  35.5 0.742  8.84     1     1 0.796
# Conclusion:
# The bricks series exhibits strong quarterly seasonality.
# The seasonal naive model captures this pattern effectively
# by repeating the value from the same quarter last year.
# Residuals appear approximately white noise,
# although some minor autocorrelation may remain.
# SNAIVE is an appropriate benchmark for this series.
#5.7

# --------------------------------------------------
# 1. CREATE MY RETAIL SERIES
# (You can change State/Industry if needed)
# --------------------------------------------------

myseries <- aus_retail |>
  filter(State == "Victoria",
         Industry == "Cafes, restaurants and takeaway food services") |>
  select(Month, Turnover)

# --------------------------------------------------
# 2. TRAIN / TEST SPLIT
# --------------------------------------------------

myseries_train <- myseries |>
  filter(year(Month) < 2011)

# Check split visually
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red") +
  labs(title = "Retail Series (Training in Red)",
       y = "Turnover")

# --------------------------------------------------
# 3. FIT SEASONAL NAIVE MODEL
# --------------------------------------------------

fit <- myseries_train |>
  model(SNAIVE(Turnover))

# --------------------------------------------------
# 4. RESIDUAL DIAGNOSTICS
# --------------------------------------------------

fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# --------------------------------------------------
# 5. FORECAST TEST DATA
# --------------------------------------------------

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(Month, Turnover)`
fc |> autoplot(myseries) +
  labs(title = "Seasonal Naive Forecast vs Actual",
       y = "Turnover")

# --------------------------------------------------
# 6. ACCURACY COMPARISON
# --------------------------------------------------

# Training accuracy
fit |> accuracy()
## # A tibble: 1 × 10
##   .model           .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>            <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Training  22.1  37.6  27.6  6.81  8.76     1     1 0.829
# Test accuracy
fc |> accuracy(myseries)
## # A tibble: 1 × 10
##   .model           .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>            <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Test   69.0  116.  89.9  7.68  10.8  3.25  3.09 0.921
# --------------------------------------------------
# 7. SENSITIVITY ANALYSIS
# --------------------------------------------------

cutoffs <- c(2008, 2009, 2010, 2011, 2012)

sens <- map_dfr(cutoffs, function(cut) {

  tr <- myseries |> filter(year(Month) < cut)
  te <- anti_join(myseries, tr)

  fit_temp <- tr |> model(SNAIVE(Turnover))
  fc_temp  <- fit_temp |> forecast(new_data = te)

  acc <- fc_temp |> accuracy(myseries) |> as_tibble()
  acc$cutoff_year <- cut
  acc
})
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
sens |> select(cutoff_year, RMSE, MAE, MAPE)
## # A tibble: 5 × 4
##   cutoff_year  RMSE   MAE  MAPE
##         <dbl> <dbl> <dbl> <dbl>
## 1        2008  237. 211.   27.2
## 2        2009  216. 193.   24.6
## 3        2010  160. 131.   16.0
## 4        2011  116.  89.9  10.8
## 5        2012  138. 110.   12.9

#Residuals fluctuate around zero.

#ACF mostly inside bounds → approximately uncorrelated.

#Histogram roughly symmetric → approximately normal.

#Test accuracy worse than training accuracy (expected).

#Accuracy improves when more seasonal history is included.

#SNAIVE works well because retail turnover has strong seasonality.