Exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book.
aus_pop <- global_economy |>
filter(Code == "AUS")
fit_pop <- aus_pop |>
model(RW(Population ~ drift()))
fc_pop <- fit_pop |>
forecast(h = 10)
autoplot(fc_pop, aus_pop) +
labs(title = "Australian population – RW with drift",
x = "Year", y = "Population")
- Bricks (aus_production)
bricks <- aus_production |>
select(Quarter, Bricks)
fit_bricks <- bricks |>
model(SNAIVE(Bricks))
fc_bricks <- fit_bricks |>
forecast(h = "3 years")
autoplot(fc_bricks, bricks) +
labs(title = "Bricks – SNAIVE",
x = "Quarter", y = "Millions of bricks")
- NSW Lambs (aus_livestock)
nsw_lambs <- aus_livestock |>
filter(State == "New South Wales",
Animal == "Lambs")
fit_lambs <- nsw_lambs |>
model(SNAIVE(Count))
fc_lambs <- fit_lambs |>
forecast(h = "2 years")
autoplot(fc_lambs, nsw_lambs) +
labs(title = "NSW Lambs – SNAIVE",
x = "Month", y = "Lambs")wealth <- hh_budget |>
filter(Country == "Australia") # choose the country
# Year is already the index; Wealth is the series
fit_wealth <- wealth |>
model(RW(Wealth ~ drift()))
fc_wealth <- fit_wealth |>
forecast(h = "5 years")
autoplot(fc_wealth, wealth) +
labs(title = "Household wealth (Australia) – RW with drift",
x = "Year", y = "Wealth (% of net disposable income)")
- Australian takeaway food turnover (aus_retail).
## # A tibble: 8 × 1
## State
## <chr>
## 1 Australian Capital Territory
## 2 New South Wales
## 3 Northern Territory
## 4 Queensland
## 5 South Australia
## 6 Tasmania
## 7 Victoria
## 8 Western Australia
takeaway <- aus_retail |>
filter(State == "New South Wales",
Industry == "Takeaway food services")
takeaway## # A tsibble: 441 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 New South Wales Takeaway food services A3349792X 1982 Apr 85.4
## 2 New South Wales Takeaway food services A3349792X 1982 May 84.8
## 3 New South Wales Takeaway food services A3349792X 1982 Jun 80.7
## 4 New South Wales Takeaway food services A3349792X 1982 Jul 82.4
## 5 New South Wales Takeaway food services A3349792X 1982 Aug 80.7
## 6 New South Wales Takeaway food services A3349792X 1982 Sep 82.1
## 7 New South Wales Takeaway food services A3349792X 1982 Oct 87.3
## 8 New South Wales Takeaway food services A3349792X 1982 Nov 87.4
## 9 New South Wales Takeaway food services A3349792X 1982 Dec 97.2
## 10 New South Wales Takeaway food services A3349792X 1983 Jan 93
## # ℹ 431 more rows
fit_takeaway <- takeaway |>
model(SNAIVE(Turnover))
fc_takeaway <- fit_takeaway |>
forecast(h = "2 years")
fc_takeaway |>
autoplot(takeaway) +
labs(title = "NSW takeaway food – SNAIVE",
x = "Month", y = "Turnover")fb <- gafa_stock |>
filter(Symbol == "FB")
fb |>
ggplot(aes(x = Date, y = Close)) +
geom_line() +
labs(title = "Facebook daily closing price",
x = "Date", y = "Close")
- B. Produce forecasts using the drift method and plot them.
fb_ts <- gafa_stock |>
filter(Symbol == "FB") |>
arrange(Date) |>
transmute(
trading_day = row_number(), # 1, 2, 3, ... regular spacing
Close
)
fb_ts <- as_tsibble(fb_ts, index = trading_day, regular = TRUE)
fit_fb_drift <- fb_ts |>
model(RW(Close ~ drift()))
fc_fb_drift <- fit_fb_drift |>
forecast(h = 60)
fc_fb_drift |>
autoplot(fb_ts) +
labs(
title = "FB stock – drift forecasts (60 trading days ahead)",
x = "Trading day index",
y = "Closing Price (USD)"
)
- C. Show that the forecasts are identical to extending the line drawn
between the first and last observations.
augment(fit_fb_drift) |>
filter(.model == "RW(Close ~ drift())") |>
summarize(first = first(Close),
last = last(Close))## # A tsibble: 1,258 x 3 [1]
## trading_day first last
## <int> <dbl> <dbl>
## 1 1 54.7 54.7
## 2 2 54.6 54.6
## 3 3 57.2 57.2
## 4 4 57.9 57.9
## 5 5 58.2 58.2
## 6 6 57.2 57.2
## 7 7 57.9 57.9
## 8 8 55.9 55.9
## 9 9 57.7 57.7
## 10 10 57.6 57.6
## # ℹ 1,248 more rows
The drift forecasts lie on the straight line connecting the first and last observations, so extending that line gives the same forecast values.
fb_ts <- gafa_stock |>
filter(Symbol == "FB") |>
arrange(Date) |>
transmute(
trading_day = row_number(),
Close
) |>
as_tsibble(index = trading_day, regular = TRUE)
fits <- fb_ts |>
model(
mean = MEAN(Close),
naive = NAIVE(Close),
drift = RW(Close ~ drift())
)
fc <- fits |>
forecast(h = 60)
fc |>
autoplot(fb_ts) +
labs(
title = "FB stock – benchmark forecasts",
x = "Trading day index",
y = "Closing Price (USD)"
)
Among the benchmark methods, the random walk–type models are most
appropriate. The mean and seasonal naïve methods ignore the data’s
structure. Drift captures the overall upward trend in the FB series and
produces plausible forecasts over this sample, so it is the best of
these benchmarks for this data set.
beer <- aus_production |>
filter(year(Quarter) >= 1992) |>
select(Quarter, Beer)
fit_beer <- beer |>
model(SNAIVE(Beer))
# Check residuals
fit_beer |>
gg_tsresiduals()
The residuals are roughly centered around zero with no obvious remaining
seasonal pattern; most autocorrelations fall within the bounds, so they
are approximately white noise (though there may be small
deviations).
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()aus_exports <- global_economy |>
filter(Code == "AUS") |>
select(Year, Exports)
fit_exports <- aus_exports |>
model(NAIVE(Exports))
fc_exports <- fit_exports |>
forecast(h = 10)
autoplot(fc_exports, aus_exports) +
labs(title = "Australian Exports – NAIVE forecasts",
x = "Year", y = "Exports")bricks <- aus_production |>
select(Quarter, Bricks)
fit_bricks <- bricks |>
model(SNAIVE(Bricks))
fc_bricks <- fit_bricks |>
forecast(h = "3 years")
autoplot(fc_bricks, bricks) +
labs(title = "Bricks production – SNAIVE forecasts",
x = "Quarter", y = "Millions of bricks")myseries <- aus_retail |>
filter(State == "New South Wales",
Industry == "Department stores")
myseries_train <- myseries |>
filter(year(Month) < 2011)autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red") +
labs(title = "Retail series with training portion highlighted",
x = "Month", y = "Turnover")Do the residuals appear to be uncorrelated and normally distributed?
myseries_train <- myseries |> filter(year(Month) < 2011)
myseries_test <- myseries |> filter(year(Month) >= 2011)
fc <- fit |>
forecast(h = nrow(myseries_test))
autoplot(fc, myseries) +
labs(title = "SNAIVE forecasts vs actuals",
x = "Month", y = "Turnover")## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 New S… Departm… SNAIV… Trai… 11.2 24.7 19.1 3.24 5.58 1 1 -0.0527
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… New … Departm… Test -2.81 25.5 19.9 -1.02 4.05 1.04 1.03 0.201
-G. How sensitive are the accuracy measures to the amount of training data used?
The accuracy measures are not highly sensitive to the exact amount of training data, provided the training set contains at least one full seasonal cycle and the pattern is stable. Using very short training periods leads to noticeably worse accuracy, while adding much older data tends to give only small changes in forecast accuracy unless the series has structural changes