#Data 624 HW-3
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(fable)
library(ggplot2)
library(tsibble)
Australian Population (global_economy)
The population of australia is shown to be increasing in the autoplot. This lead me to believe it would be a best fit to use the RW(y ~ drift()).
aus_pop <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Population)
aus_pop %>% autoplot(Population)
fit_pop <- aus_pop %>% model(RW(Population ~ drift()))
fit_pop %>% forecast(h = 10) %>% autoplot(aus_pop)
Bricks (aus_production)
bricks <- aus_production %>%
select(Quarter, Bricks)
autoplot(bricks) + labs(title = "Bricks Production Over Time", y = "Bricks Production", x = "Quarters")
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Fit a seasonal naive model
fit_bricks <- bricks %>% model(SNAIVE(Bricks))
bricks_forecast <- fit_bricks %>% forecast(h = "2 years")
# Plot the forecast
bricks_forecast %>% autoplot(bricks) +
labs(title = "Bricks Production Forecast", y = "Bricks Production", x = "Quarter")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
NSW Lambs (aus_livestock)
nsw_lambs <- aus_livestock %>%
filter(State == "New South Wales", Animal == "Lambs") %>%
select(Month, Count)
nsw_lambs %>% autoplot(Count) +
labs(title = "NSW Lamb Production", y = "Number of Lambs", x = "Month")
# Fit a seasonal naive model
fit_lambs <- nsw_lambs %>% model(SNAIVE(Count))
#Plot the forecast
fit_lambs %>% forecast(h = "2 years") %>% autoplot(nsw_lambs) +
labs(title = "Forecast of NSW Lamb Production", y = "Number of Lambs")
Household wealth (hh_budget).
hh_wealth <- hh_budget %>% select(Year, Wealth)
hh_wealth %>% autoplot(Wealth)
fit_wealth <- hh_wealth %>% model(RW(Wealth ~ drift()))
fit_wealth %>% forecast(h = 10) %>% autoplot(hh_wealth)
Australian takeaway food turnover (aus_retail).
```{r takeaway <- aus_retail %>% filter(Industry == “Takeaway Food Services”)
takeaway %>% autoplot(Turnover) + labs(title = “Takeaway Food Services Turnover”, y = “Turnover”)
fit_takeaway <- takeaway %>% model(SNAIVE(Turnover)) forecast_results <- fit_takeaway %>% forecast(h = “2 years”)
forecast_results %>% autoplot(takeaway) + labs(title = “Forecast of Takeaway Food Services Turnover”, y = “Turnover”)
2. Use the Facebook stock price (data set gafa_stock) to do the following:
Produce a time plot of the series.
```r
gafa_facebook <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
gafa_facebook %>% autoplot(Close) +
labs(title= "Daily Close Price of Facebook", y = "Close Price(USD)")
Produce forecasts using the drift method and plot them.
fit_fb <- gafa_facebook %>%
model(RW(Close ~ drift()))
fit_fb %>% forecast(h = 50) %>% autoplot(gafa_facebook) +
labs(title= "Daily Close Price of Facebook", y = "Close Price(USD)")
Show that the forecasts are identical to extending the line drawn between the first and last observations.
first_last_line <- tibble(
x = c(min(gafa_facebook$day), max(gafa_facebook$day)),
y = c(first(gafa_facebook$Close), last(gafa_facebook$Close))
)
# Fit the model again with RW drift and overlay the line
fit_fb %>% forecast(h = 10) %>%
autoplot(gafa_facebook) +
geom_line(data = first_last_line, aes(x = x, y = y), color = "red", linetype = "dashed") +
labs(title= "Daily Close Price Forecast with First and Last Line", y = "Close Price (USD)")
Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
fit_naive <- gafa_facebook %>% model(NAIVE(Close))
fit_snaive <- gafa_facebook %>% model(SNAIVE(Close))
## Warning: 1 error encountered for SNAIVE(Close)
## [1] Non-seasonal model specification provided, use RW() or provide a different lag specification.
# Forecast and plot
fit_naive %>% forecast(h = 10) %>% autoplot(gafa_facebook) +
labs(title= "Naive Forecast of Facebook Close Price", y = "Close Price (USD)")
fit_snaive %>% forecast(h = 10) %>% autoplot(gafa_facebook) +
labs(title= "Seasonal Naive Forecast of Facebook Close Price", y = "Close Price (USD)")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).
3.Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
What do you conclude?
# Australian Exports
aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
select(Year, Exports)
fit_exports <- aus_exports %>% model(NAIVE(Exports))
fit_exports %>% forecast(h = "2 years") %>% autoplot(aus_exports) +
labs(title = "Forecast of Australian Exports", y = "Exports", x = "Year")
# Bricks
fit_bricks2 <- aus_production %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks ~ lag("year")))
fit_bricks2 %>% forecast() %>% autoplot(aus_production) +
labs(title = "Bricks Production Forecast", y = "Bricks Production", x = "Quarter")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
Create a training dataset consisting of observations before 2011 using
Check that your data have been split appropriately by producing the following plot.
Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).
Check the residuals.
Do the residuals appear to be uncorrelated and normally distributed?
Produce forecasts for the test data
Compare the accuracy of your forecasts against the actual values.
# For your retail time series (from Exercise 7 in Section 2.10):
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries_train <- myseries %>%
filter(year(Month) < 2011)
# Check the split
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red") +
labs(title = "Training and Full Series", y = "Turnover")
# Fit a seasonal naïve model
fit_retail <- myseries_train %>% model(SNAIVE(Turnover))
# Check residuals
fit_retail |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Produce forecasts for the test data
fc <- fit_retail |> forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries) +
labs(title = "Forecast vs Actual for Retail", y = "Turnover")
# Compare accuracy of forecasts against actual values
fit_retail |> accuracy() %>% select(MAE, RMSE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 5
## MAE RMSE MAPE MASE RMSSE
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6.61 8.30 8.54 1 1
fc |> accuracy(myseries) %>% select(MAE, RMSE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 5
## MAE RMSE MAPE MASE RMSSE
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 29.9 38.7 15.6 4.52 4.66