#Exercise 5.1
#5.1(a) Australian Population (global_economy)
aus_pop <- global_economy |>
filter(Country == "Australia") |>
select(Year, Population)
fit_pop <- aus_pop |>
model(RW(Population ~ drift()))
fc_pop <- fit_pop |>
forecast(h = 10)
autoplot(fc_pop, aus_pop) +
labs(title = "Australian population: RW with drift", y = "Population")
#5.1(b) Bricks (aus_production)
bricks <- aus_production |>
select(Quarter, Bricks)
fit_bricks <- bricks |>
model(SNAIVE(Bricks))
fc_bricks <- fit_bricks |>
forecast(h = "2 years")
autoplot(fc_bricks, bricks) +
labs(title = "Bricks: Seasonal naive", y = "Bricks")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
#5.1(c) NSW Lambs (aus_livestock)
nsw_lambs <- aus_livestock |>
filter(State == "New South Wales",
Animal == "Lambs")
fit_lambs <- nsw_lambs |>
model(SNAIVE(Count))
fc_lambs <- fit_lambs |>
forecast(h = "2 years")
autoplot(fc_lambs, nsw_lambs) +
labs(title = "NSW Lamb Slaughter: Seasonal Naive Forecast",
y = "Number of Lambs")
#5.1(d) Household wealth (hh_budget)
wealth <- hh_budget |>
select(Year, Wealth)
fit_wealth <- wealth |>
model(RW(Wealth ~ drift()))
fc_wealth <- fit_wealth |>
forecast(h = 8)
autoplot(fc_wealth, wealth) +
labs(title = "Household wealth: RW with drift", y = "Wealth")
#5.1(e) Australian takeaway food turnover (aus_retail)
takeaway <- aus_retail |>
filter(Industry == "Takeaway food services") |>
select(Month, Turnover)
fit_takeaway <- takeaway |>
model(SNAIVE(Turnover))
fc_takeaway <- fit_takeaway |>
forecast(h = "2 years")
autoplot(fc_takeaway, takeaway) +
labs(title = "Takeaway food turnover: Seasonal naive", y = "Turnover")
##5.2
# ---- Data prep: make FB a regular daily tsibble ----
fb <- gafa_stock |>
filter(Symbol == "FB") |>
select(Date, Close) |>
arrange(Date) |>
distinct(Date, .keep_all = TRUE) |>
as_tsibble(index = Date) |>
fill_gaps() |>
fill(Close, .direction = "down")
# 1) Time plot
autoplot(fb, Close) +
labs(title = "Facebook (FB) daily closing price", y = "Close")
# 2) Drift forecasts + plot
fit_drift <- fb |>
model(drift = RW(Close ~ drift()))
fc_drift <- fit_drift |>
forecast(h = 60)
autoplot(fc_drift, fb) +
labs(title = "FB forecasts: Random walk with drift (60 days)", y = "Close")
# 3) Show drift forecasts = extending line from first to last observation
fb_tbl <- fb |> as_tibble()
y1 <- fb_tbl$Close[1]
yT <- fb_tbl$Close[nrow(fb_tbl)]
Tn <- nrow(fb_tbl)
drift_val <- (yT - y1) / (Tn - 1)
h <- 60
manual_fc <- yT + drift_val * (1:h)
fable_fc <- fc_drift |> as_tibble() |> pull(.mean)
all.equal(fable_fc, manual_fc, tolerance = 1e-8)
## [1] TRUE
# If TRUE, you've shown they are identical.
# 4) Other benchmark forecasts + compare
fit_bench <- fb |>
model(
mean = MEAN(Close),
naive = NAIVE(Close),
drift = RW(Close ~ drift())
)
fc_bench <- fit_bench |>
forecast(h = 60)
autoplot(fc_bench, fb) +
labs(title = "FB benchmark forecasts comparison", y = "Close")
##Between the benchmarks, NAIVE is usually the safest for stock prices
because prices are often close to a random walk with no predictable
long-term direction. DRIFT can over-extrapolate the historical average
increase and may be too optimistic if the growth rate changes.
#5.3
# Extract data from 1992 onward
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Fit seasonal naïve model
fit <- recent_production |>
model(SNAIVE(Beer))
# Residual diagnostics
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).
# Forecast and plot
fit |>
forecast(h = "2 years") |>
autoplot(recent_production) +
labs(title = "Australian Beer Production: Seasonal Naïve Forecasts",
y = "Beer production")
# Check residual statistics
fit |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Beer) Training -1.57 16.1 13.6 -0.426 3.16 1 1 -0.237
#5.4
# Extract Australian exports
exports_aus <- global_economy |>
filter(Country == "Australia") |>
select(Year, Exports)
# Fit naive model
fit_exports <- exports_aus |>
model(NAIVE(Exports))
# Residual diagnostics
fit_exports |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).
# Forecasts
fit_exports |>
forecast(h = 10) |>
autoplot(exports_aus) +
labs(title = "Australian Exports: Naive Forecasts",
y = "Exports")
# Accuracy table
fit_exports |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NAIVE(Exports) Training 0.145 1.24 0.985 0.611 5.83 1 1 -0.306
# Conclusion:
# Australian exports are annual data with no seasonal pattern.
# Therefore, the naive method is appropriate.
# The residuals fluctuate around zero with no strong pattern,
# though some autocorrelation may remain.
# The naive model provides a reasonable benchmark forecast.
# Extract bricks data
bricks <- aus_production |>
select(Quarter, Bricks)
# Fit seasonal naive model
fit_bricks <- bricks |>
model(SNAIVE(Bricks))
# Residual diagnostics
fit_bricks |> gg_tsresiduals()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_rug()`).
# Forecasts
fit_bricks |>
forecast(h = "2 years") |>
autoplot(bricks) +
labs(title = "Australian Bricks: Seasonal Naive Forecasts",
y = "Bricks")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Accuracy
fit_bricks |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Bricks) Training 4.21 48.3 35.5 0.742 8.84 1 1 0.796
# Conclusion:
# The bricks series exhibits strong quarterly seasonality.
# The seasonal naive model captures this pattern effectively
# by repeating the value from the same quarter last year.
# Residuals appear approximately white noise,
# although some minor autocorrelation may remain.
# SNAIVE is an appropriate benchmark for this series.
#5.7
# --------------------------------------------------
# 1. CREATE MY RETAIL SERIES
# (You can change State/Industry if needed)
# --------------------------------------------------
myseries <- aus_retail |>
filter(State == "Victoria",
Industry == "Cafes, restaurants and takeaway food services") |>
select(Month, Turnover)
# --------------------------------------------------
# 2. TRAIN / TEST SPLIT
# --------------------------------------------------
myseries_train <- myseries |>
filter(year(Month) < 2011)
# Check split visually
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red") +
labs(title = "Retail Series (Training in Red)",
y = "Turnover")
# --------------------------------------------------
# 3. FIT SEASONAL NAIVE MODEL
# --------------------------------------------------
fit <- myseries_train |>
model(SNAIVE(Turnover))
# --------------------------------------------------
# 4. RESIDUAL DIAGNOSTICS
# --------------------------------------------------
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).
# --------------------------------------------------
# 5. FORECAST TEST DATA
# --------------------------------------------------
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(Month, Turnover)`
fc |> autoplot(myseries) +
labs(title = "Seasonal Naive Forecast vs Actual",
y = "Turnover")
# --------------------------------------------------
# 6. ACCURACY COMPARISON
# --------------------------------------------------
# Training accuracy
fit |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Training 22.1 37.6 27.6 6.81 8.76 1 1 0.829
# Test accuracy
fc |> accuracy(myseries)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Test 69.0 116. 89.9 7.68 10.8 3.25 3.09 0.921
# --------------------------------------------------
# 7. SENSITIVITY ANALYSIS
# --------------------------------------------------
cutoffs <- c(2008, 2009, 2010, 2011, 2012)
sens <- map_dfr(cutoffs, function(cut) {
tr <- myseries |> filter(year(Month) < cut)
te <- anti_join(myseries, tr)
fit_temp <- tr |> model(SNAIVE(Turnover))
fc_temp <- fit_temp |> forecast(new_data = te)
acc <- fc_temp |> accuracy(myseries) |> as_tibble()
acc$cutoff_year <- cut
acc
})
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
## Joining with `by = join_by(Month, Turnover)`
sens |> select(cutoff_year, RMSE, MAE, MAPE)
## # A tibble: 5 × 4
## cutoff_year RMSE MAE MAPE
## <dbl> <dbl> <dbl> <dbl>
## 1 2008 237. 211. 27.2
## 2 2009 216. 193. 24.6
## 3 2010 160. 131. 16.0
## 4 2011 116. 89.9 10.8
## 5 2012 138. 110. 12.9
#Residuals fluctuate around zero.
#ACF mostly inside bounds → approximately uncorrelated.
#Histogram roughly symmetric → approximately normal.
#Test accuracy worse than training accuracy (expected).
#Accuracy improves when more seasonal history is included.
#SNAIVE works well because retail turnover has strong seasonality.