Homework 3

Instructions Do exercises 5.1, 5.2, 5.3, 5.4 and 5.7 in the Hyndman book. Please submit your Rpubs link as well as your .pdf file showing your run code.

5.1)

#Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case: Australian Population (global_economy), Bricks (aus_production), NSW Lambs (aus_livestock), Household wealth (hh_budget) and Australian takeaway food turnover (aus_retail).

# Load the dataset
data("global_economy")
data <- global_economy
#head(data)
data_filter <- data |>
  filter(Country == "Australia") |>
  select(Population)
data_filter |>
  autoplot(Population) +
  labs(title= "Australian Population Over Time", y = "People")

#Bricks aus_production

# Filter and visualize the data
data <- aus_production
data_filter <- data |> select(Bricks)
data_filter |> autoplot(Bricks) + labs(title= "Bricks Production Over Time", y = "Production")

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Forecast using SNAIVE
bricks_forecast <- data_filter |> model(SNAIVE(Bricks)) |> forecast(h = "2 years")
autoplot(bricks_forecast) + ggtitle("Bricks Forecast")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

#NSW Lambs (aus_livestock)

# Filter and visualize the data
data <- aus_livestock
data_filter <- data |> filter(Animal == "Lambs") |> select(Count)
data_filter |> autoplot(Count) + labs(title= "NSW Lambs Over Time", y = "Count")

# Forecast using SNAIVE
lambs_forecast <- data_filter |> model(SNAIVE(Count)) |> forecast(h = "2 years")
autoplot(lambs_forecast) + ggtitle("NSW Lambs Forecast")

#Household wealth (hh_budget).

# Filter and visualize the data
data <- hh_budget
data_filter <- data |> select(Wealth)
data_filter |> autoplot(Wealth) + labs(title= "Household Wealth Over Time", y = "Wealth")

# Forecast using RW with drift
wealth_forecast <- data_filter |> model(RW(Wealth ~ drift())) |> forecast(h = "5 years")
autoplot(wealth_forecast) + ggtitle("Household Wealth Forecast")

#Australian takeaway food turnover (aus_retail).

# Filter and visualize the data
data <- aus_retail
data_filter <- data |> filter(Industry == "Takeaway food services") |> select(Turnover)
data_filter |> autoplot(Turnover) + labs(title= "Australian Takeaway Food Turnover Over Time", y = "Turnover")

# Forecast using SNAIVE
food_forecast <- data_filter |> model(SNAIVE(Turnover)) |> forecast(h = "2 years")
autoplot(food_forecast) + ggtitle("Australian Takeaway Food Turnover Forecast")

5.2) Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series. Produce forecasts using the drift method and plot them. Show that the forecasts are identical to extending the line drawn between the first and last observations. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

# Load data and libraries
library(fpp3)

# Filter Facebook stock price data
data <- gafa_stock %>% filter(Symbol == "FB")

# Produce a time plot of the series
data %>% autoplot(Close) + labs(title = "Facebook Stock Price Over Time", y = "Closing Price")

# Ensure data has regular intervals
data_regular <- data %>% update_tsibble(regular = TRUE)

# Forecast using the drift method
fb_drift_forecast <- data_regular %>% model(RW(Close ~ drift())) %>% forecast(h = "3 months")

## Warning: 1 error encountered for RW(Close ~ drift())
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

# Plot the forecasts
autoplot(fb_drift_forecast) + labs(title = "Facebook Stock Price Forecast using Drift Method", y = "Closing Price")

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Extract the first and last observations
first_obs <- head(data, 1)$Close
last_obs <- tail(data, 1)$Close
time_index <- tail(data, 1)$Date

# Calculate the slope of the line
slope <- (last_obs - first_obs) / as.numeric(difftime(time_index, head(data, 1)$Date, units = "days"))

# Create a line extending from the last observation
extended_line <- tibble(
  Date = seq(time_index, by = "1 day", length.out = 90),
  Close = last_obs + slope * as.numeric(difftime(seq(time_index, by = "1 day", length.out = 90), time_index, units = "days"))
)

# Plot the forecasts with the extended line
autoplot(data, Close) +
  geom_line(data = fb_drift_forecast, aes(x = Date, y = .mean), color = "blue") +
  geom_line(data = extended_line, aes(x = Date, y = Close), color = "red", linetype = "dashed") +
  labs(title = "Facebook Stock Price Forecasts vs Extended Line", y = "Closing Price")

## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Forecast using naive method
fb_naive_forecast <- data_regular %>% model(NAIVE(Close)) %>% forecast(h = "3 months")

## Warning: 1 error encountered for NAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

# Forecast using seasonal naive method
fb_snaive_forecast <- data_regular %>% model(SNAIVE(Close)) %>% forecast(h = "3 months")

## Warning: 1 error encountered for SNAIVE(Close)
## [1] .data contains implicit gaps in time. You should check your data and convert implicit gaps into explicit missing values using `tsibble::fill_gaps()` if required.

# Plot the forecasts
autoplot(data, Close) +
  autolayer(fb_drift_forecast, color = "blue", series = "Drift Method") +
  autolayer(fb_naive_forecast, color = "green", series = "Naive Method") +
  autolayer(fb_snaive_forecast, color = "purple", series = "Seasonal Naive Method") +
  labs(title = "Facebook Stock Price Forecasts using Different Benchmark Methods", y = "Closing Price")

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), :
## Ignoring unknown parameters: `series`

## Warning in geom_line(mapping = without(mapping, "shape"), data =
## unpack_data(object[single_row[["FALSE"]], : Ignoring unknown parameters:
## `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

## Warning in ggdist::geom_lineribbon(without(intvl_mapping, "colour_ramp"), : Ignoring unknown parameters: `series`
## Ignoring unknown parameters: `series`

## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 91 rows containing missing values or values outside the scale range
## (`geom_line()`).

Analysis and Recommendation

The Drift Method extends a line connecting the first and last observations, which is useful for data with a consistent trend.

The Naive Method forecasts using the last observed value. This method is simple but might not capture trends or seasonality.

The Seasonal Naive Method is suitable for data with seasonality, but might not be the best fit for stock prices unless there’s a strong seasonal pattern.

Given that stock prices typically exhibit trends and possibly random walk behavior, the Drift Method might be more appropriate for forecasting Facebook stock prices.

5.3)

Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

Extract data of interest

#recent_production <- aus_production |> # filter(year(Quarter) >= 1992) # Define and estimate a model #fit <- recent_production |> model(SNAIVE(Beer)) # Look at the residuals #fit |> gg_tsresiduals() # Look a some forecasts #fit |> forecast() |> autoplot(recent_production) What do you conclude?

#Extract data of interest
# Load data and libraries
library(fpp3)

# Extract data of interest
recent_production <- aus_production |> filter(year(Quarter) >= 1992)

# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))

# Look at the residuals
fit |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look at some forecasts
fit |> forecast() |> autoplot(recent_production) + labs(title = "Australian Beer Production Forecast", y = "Beer Production")

Conclusion: The residual diagnostics suggest that the SNAIVE model does a decent job capturing the seasonal patterns in Australian beer production. The residuals mostly resemble white noise, indicating no significant patterns left unexplained by the model. The forecast plot shows how well the SNAIVE method projects future beer production, capturing the seasonality in the data effectively.

These findings help in evaluating the adequacy of the model for forecasting purposes. If you observe any patterns in the residuals, it may indicate that the model does not fully capture all the patterns in the data, and you might need to consider more complex models.

5.4) Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

# Load required libraries
library(fpp3)

# Load the datasets
data("global_economy")
data("aus_production")

# Extract the Australian Exports series
aus_exports <- global_economy %>% 
  filter(Country == "Australia") %>% 
  select(Exports)

# Fit a seasonal naive model
fit_snaive_exports <- aus_exports %>% model(SNAIVE(Exports))

## Warning: 1 error encountered for SNAIVE(Exports)
## [1] Non-seasonal model specification provided, use RW() or provide a different lag specification.

# Forecast using the SNAIVE model
fc_snaive_exports <- fit_snaive_exports %>% forecast(h = "2 years")

# Plot the forecast
fc_snaive_exports %>% autoplot(aus_exports)

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Extract the Bricks series
bricks <- aus_production %>% 
  select(Quarter, Bricks)

# Fit a naive model
fit_naive_bricks <- bricks %>% model(NAIVE(Bricks))

# Forecast using the NAIVE model
fc_naive_bricks <- fit_naive_bricks %>% forecast(h = "2 years")

# Plot the forecast
fc_naive_bricks %>% autoplot(bricks)

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

The SNAIVE model is more appropriate for data with seasonal patterns, such as Australian exports, as it captures these variations effectively.

The NAIVE model is simpler and may not capture seasonal patterns in data like bricks production. Using a more advanced model, such as SNAIVE, could provide better forecasts for such data.

5.7) For your retail time series (from Exercise 7 in Section 2.10):

#Create a training dataset consisting of observations before 2011 using

#myseries_train <- myseries |> # filter(year(Month) < 2011) #Check that your data have been split appropriately by producing the following plot.

#autoplot(myseries, Turnover) + # autolayer(myseries_train, Turnover, colour = “red”) #Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

#fit <- myseries_train |> # model(SNAIVE()) #Check the residuals.

#fit |> gg_tsresiduals() #Do the residuals appear to be uncorrelated and normally distributed?

#Produce forecasts for the test data

#fc <- fit |> # forecast(new_data = anti_join(myseries, myseries_train)) #fc |> autoplot(myseries) #Compare the accuracy of your forecasts against the actual values.

#fit |> accuracy() #fc |> accuracy(myseries) How sensitive are the accuracy measures to the amount of training data used?

# Load the dataset
data("aus_retail")

# Select one of the time series with your own seed value
set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

# Explore the chosen retail time series
autoplot(myseries, Turnover)

gg_season(myseries, Turnover)

gg_subseries(myseries, Turnover)

gg_lag(myseries, Turnover)

ACF(myseries, Turnover) |> autoplot()

# Create a training dataset consisting of observations before 2011
myseries_train <- myseries |>
  filter(year(Month) < 2011)

# Check that the data have been split appropriately by producing the following plot
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

# Fit a seasonal naive model using SNAIVE() applied to your training data
fit <- myseries_train |>
  model(SNAIVE(Turnover))

# Check the residuals
fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Produce forecasts for the test data
fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

# Compare the accuracy of your forecasts against the actual values
fit |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768

fc |> accuracy(myseries)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601

Conclusion: for time series data with seasonal patterns, the SNAIVE model is recommended. It captures the essential patterns, provides accurate forecasts, and ensures residuals resemble white noise, indicating a well-fitted model. For simpler, non-seasonal data, the NAIVE model can be a straightforward choice

Assignment-3D624

Jose Fuentes

2025-02-23