Exercises: 5.1, 5.2, 5.3, 5.4 and 5.7
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.4.3
## Registered S3 methods overwritten by 'ggtime':
## method from
## autolayer.fbl_ts fabletools
## autolayer.tbl_ts fabletools
## autoplot.dcmp_ts fabletools
## autoplot.fbl_ts fabletools
## autoplot.tbl_ts fabletools
## fortify.fbl_ts fabletools
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.3 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.2.0
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ ggtime 0.2.0
## ✔ lubridate 1.9.4 ✔ feasts 0.5.0
## ✔ ggplot2 4.0.0 ✔ fable 0.5.0
## Warning: package 'tsibble' was built under R version 4.4.3
## Warning: package 'ggtime' was built under R version 4.4.3
## Warning: package 'feasts' was built under R version 4.4.3
## Warning: package 'fabletools' was built under R version 4.4.3
## Warning: package 'fable' was built under R version 4.4.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
aus_population <- global_economy |>
filter(Country == "Australia") |>
select(Year, Population)
aus_population |>
autoplot(Population) +
theme_light() +
labs(
title = "Australian Population",
y = "Population",
x = "Year"
)
Since this is a population data then I think I should use the RW model since there is a steady upward trend.
fit <- aus_population |>
model(RW(Population ~ drift()))
aus_forecast <- fit |> forecast(h = 10)
aus_forecast |>
autoplot(aus_population) +
theme_light() +
labs(
title = "Australian Population Forecast",
x = "Year",
y = "Population"
)
bricks <- aus_production |>
filter(!is.na(Bricks)) |>
select(Quarter, Bricks)
bricks |>
autoplot(Bricks) +
theme_light() +
labs(
title = "Australian Brick Production",
x = "Quarter",
y = "Bricks(millions)"
)
Since this is seasonal the model I will use for this is SNAIVE.
fit <- bricks |>
model(SNAIVE(Bricks ~ lag("year")))
brick_forecast <- fit |> forecast(h = "2 years")
brick_forecast |>
autoplot(bricks) +
theme_light() +
labs(
title = "Australian Brick Production Forecast",
x = "Quarter",
y = "Bricks(millions)"
)
nws_lambs <- aus_livestock |>
filter(State == "New South Wales", Animal == "Lambs") |>
select(Month, Count)
nws_lambs |>
autoplot(Count) +
theme_light() +
labs(
title = "NSW Lambs",
x = "Month",
y = "Count(thousands)"
)
Since there is a seasonal pattern I will use the SNAIVE model.
fit <- nws_lambs |>
model(SNAIVE(Count ~ lag("year")))
lamb_forecast <- fit |> forecast(h = "2 years")
lamb_forecast |>
autoplot(nws_lambs) +
labs(
title = "NSW Lamb Forecast",
x = "Month",
y = "Count(thousands)"
)
hh_wealth <- hh_budget |>
select(Country, Year, Wealth)
hh_wealth |>
autoplot(Wealth) +
labs(
title = "Household Wealth",
x = "Year",
y = "Wealth"
)
There isn’t a distinct seasonal trend so I will use the RW model.
fit <- hh_wealth |>
model(RW(Wealth ~ drift()))
wealth_forecast <- fit |> forecast(h = 10)
wealth_forecast |>
autoplot(hh_wealth) +
theme_light() +
labs(
title = "Household Wealth Forecast",
x = "Year",
y = "Wealth"
)
takeaway <- aus_retail |>
filter(Industry == "Takeaway food services") |>
summarise(Turnover = sum(Turnover))
takeaway |>
autoplot(Turnover) +
labs(
title = "Australian Takeaway Food Turnover",
x = "Month",
y = "Turnover($millions)"
)
There is a clear seasonal pattern so I will use the SNAIVE model here.
fit <- takeaway |>
model(SNAIVE(Turnover ~ lag("year")))
takeaway_forecast <- fit |> forecast(h = "2 years")
takeaway_forecast |>
autoplot(takeaway) +
theme_light() +
labs(
title = "Australian Takeaway Food Turnover",
x = "Month",
y = "Turnover($millions)"
)
facebook <- gafa_stock |>
filter(Symbol == "FB")
facebook |>
autoplot(Close) +
theme_light() +
labs(
title = "Facebook Daily Closing Stock Price",
x = "Date",
y = "CLosing Price (USD)"
)
facebook_fc <- gafa_stock |>
filter(Symbol == "FB") |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE)
fit <- facebook_fc |>
model(RW(Close ~ drift()))
fb_forecast <- fit |> forecast(h = 90)
fb_forecast |>
autoplot(facebook_fc) +
theme_light() +
labs(
title = "Facebook Stock Price Forecast",
x = "Trading Day",
y = "Closing Price (USD)"
)
facebook <- gafa_stock |>
filter(Symbol == "FB") |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE)
fit <- facebook |>
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
Drift = RW(Close ~ drift())
)
facebook_fc <- fit |> forecast(h = 90)
facebook_fc |>
autoplot(facebook, level = NULL) +
labs(
title = "Facebook Stock Price - All Forecasts",
x = "Trading Day",
y = "Closing Price(USD)") +
guides(color = guide_legend(title = "Method"))
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
## What do you conclude?
I see a significant spike on the ACF graph around lag 4, which I am assuming the model is missing the pattern there. The histogram is skewed to the left so the model is making more negative errors. Ths means that the model is probably overestimating the production more often than not.
aus_exports <- global_economy |>
filter(Country == "Australia") |>
select(Year, Exports)
fit_exports <- aus_exports |>
model(NAIVE(Exports))
fit_exports |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).
fit_exports |> forecast() |> autoplot(aus_exports)
bricks <- aus_production |>
filter(!is.na(Bricks)) |>
select(Quarter, Bricks)
fit_bricks <- bricks |>
model(SNAIVE(Bricks ~ lag("year")))
fit_bricks |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).
fit_bricks |> forecast() |> autoplot(bricks)
myseries <- aus_retail |>
filter(
State == "New South Wales",
Industry == "Clothing retailing"
)
head(myseries)
aus_retail |> distinct(State)
aus_retail |> distinct(Industry)
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).
Do the residuals appear to be uncorrelated and normally distributed?
They appear to be uncorrelated. There are too many significant spikes in the ACF plot. The increasing variability inthe time plot shows that the model is not truly representing the recent years. THhe historgram ont he other hadn appears to be normally ditributed and we can assume some normality because of that. OVerall there is significant autocorrelation so the residuals fail the white noise test.
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
fit |> accuracy()
fc |> accuracy(myseries)
I think that if we use less training data then the errors would increase. But then also increasing the size will decrease the size of the test set and that would make it less accuate. The MPE is only 21%, and that suggests that the model is not capturing all of the patterns and a better more sophisticated model is needed.