Do exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book. Please submit your Rpubs link as well as your .pdf file showing your run code.
# Load libraries
library(fpp3)
library(fable)
library(tsibble)
library(dplyr)
library(ggplot2)
library(feasts)
library(gridExtra)
library(imputeTS)
Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:
Australian Population (global_economy) - RW with Drift - Population data tends to have strong trend but has no seasonal component.
Bricks (aus_production - SNAIVE - Brick production contains a strong seasonal pattern.
NSW Lambs (aus_livestock) SNAIVE - Livestock volume contains a strong seasonal pattern.
Household wealth (hh_budget) - RW with Drift - Financial data (as household wealth) contains strong trend pattern but has no seasonal component.
Australian takeaway food turnover (aus_retail) - SNAIVE - Retail sales contain strong seasonal component.
# Australian Population data (global_economy)
aus_population <- global_economy %>% filter(Country == "Australia")
# Fit the RW with drift model
fit_population <- aus_population %>%
model(RW = RW(Population ~ drift()))
# Create forecast
forecast_population <- fit_population %>% forecast(h = "10 years")
# Plot forecast
autoplot(forecast_population, aus_population) +
labs(title = "Forecast: Australian Population (RW with Drift)", y = "Population")
# Bricks data (aus_production)
bricks <- aus_production %>%
as_tsibble(index = Quarter)
# Drop rows with missing values in the Bricks column
bricks <- bricks %>%
filter(!is.na(Bricks))
# Fit the SNAIVE model
fit_bricks <- bricks %>%
model(SNAIVE = SNAIVE(Bricks))
# Create forecast
forecast_bricks <- fit_bricks %>% forecast(h = "2 years")
# Plot the forecast
autoplot(forecast_bricks, bricks) +
labs(title = "Forecast: Brick Production (SNAIVE)", y = "Bricks Production")
# NSW Lambs data (aus_livestock)
lambs <- aus_livestock %>% filter(Animal == "Lambs", State == "New South Wales")
# Fit the SNAIVE model
fit_lambs <- lambs %>%
model(SNAIVE = SNAIVE(Count))
# Create forecast
forecast_lambs <- fit_lambs %>% forecast(h = "1 year")
# Plot forecast
autoplot(forecast_lambs, lambs) +
labs(title = "Forecast: NSW Lamb Production (SNAIVE)", y = "Lambs (count)")
# Household Wealth data (hh_budget)
household_wealth <- hh_budget %>% filter(Country == "Australia")
# Fit the RW with drift model
fit_household_wealth <- household_wealth %>%
model(RW = RW(Wealth ~ drift()))
# Create forecast
forecast_household_wealth <- fit_household_wealth %>% forecast(h = "5 years")
# Plot forecast
autoplot(forecast_household_wealth, household_wealth) +
labs(title = "Forecast: Australia Household Wealth (RW with Drift)", y = "Wealth (Millions)")
# Australian Takeaway Food Turnover data (aus_retail)
aus_takeaway <- aus_retail %>%
filter(Industry == "Takeaway food services", State == "Western Australia")
# Fit the SNAIVE model
fit_takeaway <- aus_takeaway %>%
model(SNAIVE = SNAIVE(Turnover))
# Create the forecast
forecast_takeaway <- fit_takeaway %>% forecast(h = "1 year")
# Plot forecast
autoplot(forecast_takeaway, aus_takeaway) +
labs(title = "Forecast: Takeaway Food Turnover in Western Australia (SNAIVE)",
y = "Turnover (Millions)")
Use the Facebook stock price (data set gafa_stock) to do the following:
It is difficult to tell which of the models is best. It is obvious that the seasonal naive is not appropriate as the data has no seasonal component. However, the forecasts for RW with Drift, Naive and the linear forecasts are very similar and nearly identical for this data. In this particular case, if we were looking for a basic benchmark, we would use the naive model to align with the efficient market hypothesis.
# Filter gafa_stock dataset for Facebook
fb_stock <- gafa_stock %>%
filter(Symbol == "FB") %>%
update_tsibble(regular = TRUE) %>%
fill_gaps(Close = NA)
# Time plot of Facebook stock price
ptime <- autoplot(fb_stock, Close) +
labs(title = "Facebook Stock Price",
x = "Date",
y = "Stock Price (USD)")
# Fit a drift model to the Facebook stock price data
fb_drift_model <- fb_stock %>%
model(Drift = RW(Close ~ drift()))
# Generate forecasts
fb_drift_forecast <- fb_drift_model %>%
forecast(h = "12 months")
# Plot data with the drift forecast
p1 <- autoplot(fb_drift_forecast, fb_stock) +
labs(title = "Facebook Stock Price with Drift Forecast",
x = "Date",
y = "Stock Price (USD)") +
guides(colour = guide_legend(title = "Forecast"))
# Filter the gafa_stock dataset to use TradingDay index
fb_stock_tsibble <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(TradingDay = row_number()) %>%
as_tsibble(index = TradingDay, regular = TRUE) %>%
drop_na(Close)
# Fit a drift model
fb_drift_model <- fb_stock_tsibble %>%
model(Drift = RW(Close ~ drift()))
# Generate forecasts
fb_drift_forecast <- fb_drift_model %>%
forecast(h = 365)
# Plot data data with the drift forecast
p2 <- autoplot(fb_drift_forecast, fb_stock_tsibble) +
labs(title = "Facebook Stock Price with Drift Forecast by Trading Day",
x = "Trading Day",
y = "Stock Price (USD)") +
guides(colour = guide_legend(title = "Forecast"))
# Display plots
grid.arrange(ptime,p1, p2, ncol = 1)
# Filter the gafa_stock dataset and create TradingDay variable
fb_stock_tsibble <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(TradingDay = row_number()) %>%
as_tsibble(index = TradingDay, regular = TRUE) %>%
drop_na(Close)
# Extract the first and last trading days and close prices
first_day <- first(fb_stock$Date)
last_day <- last(fb_stock$Date)
first_close <- first(fb_stock$Close)
last_close <- last(fb_stock$Close)
# Calculate the slope and intercept of the line
slope <- (last_close - first_close) / as.numeric(difftime(last_day, first_day, units = "days"))
intercept <- first_close - slope * as.numeric(first_day)
# Define forecast horizon
forecast_horizon <- 365
# Define the forecast horizon
future_days <- seq(last_day + 1, by = 1, length.out = forecast_horizon)
# Create a data frame for the future dates and forecasted values
forecast_df <- tibble(
Date = future_days,
Close = intercept + slope * as.numeric(future_days))
# Create a data frame for the line between the first and last points
line_df <- tibble(
Date = c(first_day, last_day),
Close = c(first_close, last_close))
# Plot the original data along with the line and forecast
p3 <- autoplot(fb_stock, Close) +
geom_line(data = line_df, aes(x = Date, y = Close), color = "blue", size = .5) +
geom_line(data = forecast_df, aes(x = Date, y = Close), color = "red", linetype = "dashed", size = .5) +
labs(title = "Facebook Stock Price with Linear Forecast",
x = "Trading Day",
y = "Stock Price (USD)") +
guides(colour = guide_legend(title = "Forecast"))
p3
# Filter the gafa_stock dataset and create TradingDay variable
fb_stock_tsibble <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(TradingDay = row_number()) %>%
as_tsibble(index = TradingDay, regular = TRUE) %>%
drop_na(Close)
# Fit a naive model
fb_naive_model <- fb_stock_tsibble %>%
model(NAIVE(Close))
# Generate naive forecasts
fb_naive_forecast <- fb_naive_model %>%
forecast(h = 365)
# Plot the original data and naive forecast
p4 <- fb_naive_forecast %>%
autoplot(fb_stock_tsibble) +
labs(title = "Facebook Stock Price with Naive Forecast",
x = "Trading Day",
y = "Stock Price (USD)") +
guides(colour = guide_legend(title = "Forecast"))
#Convert to a regular time series and fill missing dates
fb_stock <- fb_stock %>%
update_tsibble(regular = TRUE) %>%
fill_gaps(Close = NA)
# Fit a seasonal naive model
fb_snaive_model <- fb_stock %>%
model(SNAIVE(Close))
# Generate seasonal naive forecasts
fb_snaive_forecast <- fb_snaive_model %>%
forecast(h = 365)
# Plot data and seasonal naive forecast
p5 <- fb_snaive_forecast %>%
autoplot(fb_stock) +
labs(title = "Facebook Stock Price with Seasonal Naive Forecast",
x = "Date",
y = "Stock Price (USD)") +
guides(colour = guide_legend(title = "Forecast"))
# Display plots
grid.arrange(p4, p5, ncol = 1)
Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. What do you conclude?
Visually, the residuals show that some season pattern information remains unaccounted for in the model. The innovation residuals have a mean around zero but point to a seasonal pattern within the plot. The ACF plot has no significantly correlated spikes except for lag 4; the residuals looked nearly normally distributed. The forecast itself visually fits the seasonal pattern.
However, the Ljung-Box test allows us to test the residuals and indicates the model may be performing more poorly than the visual indicate. In this case, because the LB pvalue is (less than) < 0.05, we can reject the hypothesis that the residuals are white noise and can conclude the residuals contain information that is not accounted for in the model. The high LB statistic also points to this conclusion the series is not white noise.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
# Ljung-Box test to check for autocorrelation
augment(fit) |>
features(.innov, ljung_box, lag = 8) |>
select(lb_stat, lb_pvalue)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 32.3 0.0000834
Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
Australian Exports: We use the NAIVE model since it has no seasonal component (as yearly data). The residuals hover around a mean of zero, the ACF shows no significantly correlated spikes except for lag 1. The residuals are normally distributed. In addition, the LB pvalue is (greater than) > .05 and we can accept the the hypothesis that the residuals are white noise. The moderately low LB stat also points to this conclusion.
Bricks Production: We use the SNAIVE model to account for the seasonal components of the data. However, the residuals do not appear to hover around mean zero and seem to contain a seasonal pattern. The ACF plot shows a handful of significantly correlated lags; the residuals are not normally distributed and skew left. The LB pvalue is (less than) <.05 so we reject the null hypothesis that the residuals are white and conclude there is additional seasonality that is not accounted for in the model. The high LB stat also points to this conclusion.
# Filter Exports data
recent_exports <- global_economy |>
filter(Country == "Australia")|>
select(Exports) |>
drop_na()
# Define and estimate a model for Australian Exports using NAIVE
fit_exports <- recent_exports |> model(NAIVE(Exports))
# Exports Residuals
fit_exports |> gg_tsresiduals() +
ggtitle("Residuals of Australian Exports Forecast") +
labs(x = "Year", y = "Residuals")
# Exports Forecast
fit_exports |> forecast() |> autoplot(recent_exports) +
ggtitle("Forecast for Australian Exports") +
labs(x = "Year", y = "Exports (in millions)")
# Ljung-Box test on residuals for Exports
augment(fit_exports) |>
features(.innov, ljung_box, lag = 10) |>
select(lb_stat, lb_pvalue)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 16.4 0.0896
# Filter Bricks Data
recent_bricks <- aus_production |>
filter(year(Quarter) >= 1992) |>
select(Bricks) |>
drop_na()
# Bricks using SNAIVE
fit_bricks <- recent_bricks |> model(SNAIVE(Bricks))
# Bricks Residuals
fit_bricks |> gg_tsresiduals() +
ggtitle("Residuals of Bricks Forecast") +
labs(x = "Quarter", y = "Residuals")
# Bricks Forecast
fit_bricks |> forecast() |> autoplot(recent_bricks) +
ggtitle("Forecast for Bricks Production") +
labs(x = "Quarter", y = "Bricks (in millions)")
# Ljung-Box test on residuals for Bricks
augment(fit_bricks) |>
features(.innov, ljung_box, lag = 8) |>
select(lb_stat, lb_pvalue)
## # A tibble: 1 × 2
## lb_stat lb_pvalue
## <dbl> <dbl>
## 1 62.2 1.74e-10
For your retail time series (from Exercise 7 in Section 2.10):
For this example, I compared the SNAIVE and RW with Drift accuracy.
# My Series from 2.10
set.seed(123456789)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# Create a training dataset
myseries_train <- myseries |>
filter(year(Month) < 2011)
# Plot split data
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "orange")
# Fit a seasonal naïve model (SNAIVE)
fit_snaive <- myseries_train |>
model(SNAIVE = SNAIVE(Turnover))
# Check the residuals for the SNAIVE model
fit_snaive |> gg_tsresiduals() +
labs(title = "Residuals for Seasonal Naive Model (SNAIVE)")
# Fit a random walk with drift model
fit_rw <- myseries_train |>
model(RW_Drift = RW(Turnover ~ drift()))
# Check the residuals for the Random Walk with Drift model
fit_rw |> gg_tsresiduals() +
labs(title = "Residuals for Random Walk with Drift Model")
# Generate forecasts for the test data using both models
fc_snaive <- fit_snaive |>
forecast(new_data = anti_join(myseries, myseries_train))
fc_rw <- fit_rw |>
forecast(new_data = anti_join(myseries, myseries_train))
# Plot forecasts for SNAIVE model
fc_snaive |> autoplot(myseries) +
autolayer(myseries_train, Turnover, series = "Training Data") +
labs(title = "Forecast: SNAIVE Model")
# Plot forecasts for Random Walk with Drift model
fc_rw |> autoplot(myseries) +
autolayer(myseries_train, Turnover, series = "Training Data") +
labs(title = "Forecast: Random Walk with Drift Model")
# Compare the accuracy of the forecasts against the actual values for both models
combined_accuracy <- bind_rows(
accuracy(fit_snaive) |> mutate(Model = "SNAIVE", Type = "Training"),
accuracy(fc_snaive, myseries) |> mutate(Model = "SNAIVE", Type = "Forecast"),
accuracy(fit_rw) |> mutate(Model = "Random Walk with Drift", Type = "Training"),
accuracy(fc_rw, myseries) |> mutate(Model = "Random Walk with Drift", Type = "Forecast")
)|> select(-State, -Industry, -Model, -Type, -ACF1)
combined_accuracy
## # A tibble: 4 × 9
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE Training 1.11e+ 0 2.74 1.81 6.81 14.0 1 1
## 2 SNAIVE Test -5.45e- 1 5.47 4.54 -4.61 14.9 2.50 2.00
## 3 RW_Drift Training -6.00e-16 2.33 1.49 -2.57 13.3 0.822 0.850
## 4 RW_Drift Test -9.54e+ 0 10.4 9.57 -31.6 31.7 5.28 3.79