aus_pop <- global_economy |>
filter(Country == "Australia")
aus_pop |> autoplot(Population) +
labs(title = "Australian Population Over Time")
pop_model <- aus_pop |> model(RW(Population ~ drift()))
pop_forecast <- pop_model |> forecast(h = 5)
pop_forecast |> autoplot(aus_pop) +
labs(title = "Population Forecast - Drift Method")
aus_production |> autoplot(Bricks) +
labs(title = "Australian Brick Production")
brick_model <- aus_production |> model(SNAIVE(Bricks))
brick_forecast <- brick_model |> forecast(h = 8)
brick_forecast |> autoplot(aus_production) +
labs(title = "Brick Production Forecast")
nsw_lambs <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs")
nsw_lambs |> autoplot(Count) +
labs(title = "NSW Lamb Count")
lamb_model <- nsw_lambs |> model(SNAIVE(Count))
lamb_forecast <- lamb_model |> forecast(h = 8)
lamb_forecast |> autoplot(nsw_lambs) +
labs(title = "NSW Lamb Forecast")
hh_budget |> autoplot(Wealth) +
labs(title = "Household Wealth")
wealth_model <- hh_budget |> model(RW(Wealth ~ drift()))
wealth_forecast <- wealth_model |> forecast(h = 4)
wealth_forecast |> autoplot(hh_budget) +
labs(title = "Wealth Forecast Using Drift")
takeaway_data <- aus_retail |>
filter(Industry == "Takeaway food services") |>
summarise(Turnover = sum(Turnover))
takeaway_data |> autoplot(Turnover) +
labs(title = "Australian Takeaway Food Turnover")
takeaway_model <- takeaway_data |> model(SNAIVE(Turnover))
takeaway_forecast <- takeaway_model |> forecast(h = 12)
takeaway_forecast |> autoplot(takeaway_data) +
labs(title = "Takeaway Turnover Forecast")
fb_data <- gafa_stock |>
filter(Symbol == "FB") |>
mutate(trading_day = row_number()) |>
update_tsibble(index = trading_day, regular = TRUE)
fb_data |> autoplot(Close) +
labs(title = "Facebook Daily Closing Price", y = "Price ($)")
fb_drift_model <- fb_data |> model(RW(Close ~ drift()))
fb_drift_fc <- fb_drift_model |> forecast(h = 42)
fb_drift_fc |> autoplot(fb_data) +
labs(title = "Facebook Price - Drift Forecast")
drift_coef <- tidy(fb_drift_model)$estimate
first_close <- fb_data$Close[1]
last_close <- fb_data$Close[nrow(fb_data)]
days_total <- nrow(fb_data)
line_slope <- (last_close - first_close) / (days_total - 1)
print(paste("Drift coefficient:", round(drift_coef, 4)))
## [1] "Drift coefficient: 0.0608"
print(paste("Manual line slope:", round(line_slope, 4)))
## [1] "Manual line slope: 0.0608"
fb_models <- fb_data |>
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
Drift = RW(Close ~ drift())
)
fb_forecasts <- fb_models |> forecast(h = 42)
fb_forecasts |> autoplot(fb_data, level = NULL) +
labs(title = "Facebook Price - Method Comparison")
The naive method (blue) provides a reasonable forecast by maintaining the last observed price, while the mean method (green) forecasts a return to the historical average price (which seems unlikely given the recent upward trend), and the drift method (red) assumes continued linear growth at the historical rate.
recent_beer <- aus_production |>
filter(year(Quarter) >= 1992)
beer_model <- recent_beer |> model(SNAIVE(Beer))
beer_model |> gg_tsresiduals()
beer_forecasts <- beer_model |> forecast()
beer_forecasts |> autoplot(recent_beer) +
labs(title = "Beer Production Forecasts")
beer_model |> augment() |> features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 32.3 0.0000834
The Ljung-Box test gives us a p-value of 0.0000834, which indicates there’s still significant autocorrelation in the residuals. You can see in the ACF plot that several spikes extend beyond the confidence bands, confirming that the seasonal naive method isn’t capturing all the underlying patterns in the beer data.
aus_exports <- global_economy |>
filter(Country == "Australia")
exports_model <- aus_exports |> model(NAIVE(Exports))
exports_model |> gg_tsresiduals()
exports_model |> forecast(h = 5) |> autoplot(aus_exports) +
labs(title = "Australian Exports Forecast")
The naive method appears to work well for the exports data. I’ve noticed that the residuals don’t show any obvious patterns, and the autocorrelations mostly stay within the confidence bounds. For annual data like this, the simple approach seems good enough.
bricks_model <- aus_production |> model(SNAIVE(Bricks))
bricks_model |> gg_tsresiduals()
bricks_model |> forecast(h = 8) |> autoplot(aus_production) +
labs(title = "Brick Production Forecast Analysis")
I notice a similar pattern with the bricks data as we had with beer production data. The residuals show autocorrelation issues that suggest our seasonal naive approach isn’t fully capturing the data structure.
vic_livestock <- aus_livestock |>
filter(State == "Victoria")
vic_fit <- vic_livestock |> model(SNAIVE(Count))
vic_fc <- vic_fit |> forecast(h = 24)
vic_fc |> autoplot(vic_livestock) +
labs(title = "Victorian Livestock Forecasts - All Series")
set.seed(12345678)
my_retail <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
my_retail |> autoplot(Turnover)
my_retail |> gg_season(Turnover)
retail_train <- my_retail |>
filter(year(Month) < 2011)
autoplot(my_retail, Turnover) +
autolayer(retail_train, Turnover, colour = "red") +
labs(title = "Retail Data Split - Train vs Full")
retail_model <- retail_train |> model(SNAIVE(Turnover))
retail_model |> gg_tsresiduals()
You can see that the ACF plot shows multiple significant spikes, which indicates the residuals are far from white noise. The histogram shows the residuals are roughly normally distributed, but that autocorrelation pattern tells us the seasonal naive method is missing important information in the data.
retail_fc <- retail_model |>
forecast(new_data = anti_join(my_retail, retail_train))
retail_fc |> autoplot(my_retail) +
labs(title = "Retail Forecasts vs Actual")
print("Training accuracy:")
## [1] "Training accuracy:"
retail_model |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
print("Test accuracy:")
## [1] "Test accuracy:"
retail_fc |> accuracy(my_retail)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
# Test different training periods
train_2008 <- my_retail |> filter(year(Month) < 2008)
train_2009 <- my_retail |> filter(year(Month) < 2009)
train_2010 <- my_retail |> filter(year(Month) < 2010)
fit_2008 <- train_2008 |> model(SNAIVE(Turnover))
fit_2009 <- train_2009 |> model(SNAIVE(Turnover))
fit_2010 <- train_2010 |> model(SNAIVE(Turnover))
test_data <- my_retail |> filter(year(Month) >= 2011)
fc_2008 <- fit_2008 |> forecast(new_data = test_data)
fc_2009 <- fit_2009 |> forecast(new_data = test_data)
fc_2008 |> accuracy(my_retail)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 1.93 2.31 1.96 13.9 14.2 2.14 1.90 0.513
fc_2009 |> accuracy(my_retail)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 1.10 1.68 1.28 7.40 9.04 1.40 1.39 0.539
The accuracy comparison reveals something unexpected. The MASE jumps from 1.0 in training to 3.13 in testing, while RMSE nearly triples from 23.9 to 70.3. What this suggests is that the retail industry underwent structural changes after 2011 that our seasonal naive method simply couldn’t adapt to. The method worked during the stable training period, but when the underlying patterns shifted, it couldn’t keep up.