#Data 624 HW-3

library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.3.2
## ✔ lubridate   1.9.3     ✔ fable       0.3.4
## ✔ ggplot2     3.5.1     ✔ fabletools  0.4.2
## Warning: package 'tsibble' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(fable)
library(ggplot2)
library(tsibble)
  1. Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

The population of australia is shown to be increasing in the autoplot. This lead me to believe it would be a best fit to use the RW(y ~ drift()).

aus_pop <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Population)

aus_pop %>% autoplot(Population)

fit_pop <- aus_pop %>% model(RW(Population ~ drift()))
fit_pop %>% forecast(h = 10) %>% autoplot(aus_pop)

Bricks (aus_production)

bricks <- aus_production %>%
  select(Quarter, Bricks)

autoplot(bricks) + labs(title = "Bricks Production Over Time", y = "Bricks Production", x = "Quarters")
## Plot variable not specified, automatically selected `.vars = Bricks`
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Fit a seasonal naive model
fit_bricks <- bricks %>% model(SNAIVE(Bricks))
bricks_forecast <- fit_bricks %>% forecast(h = "2 years")

# Plot the forecast
bricks_forecast %>% autoplot(bricks) +
  labs(title = "Bricks Production Forecast", y = "Bricks Production", x = "Quarter")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

NSW Lambs (aus_livestock)

nsw_lambs <- aus_livestock %>%
  filter(State == "New South Wales", Animal == "Lambs") %>%
  select(Month, Count)

nsw_lambs %>% autoplot(Count) +
  labs(title = "NSW Lamb Production", y = "Number of Lambs", x = "Month")

# Fit a seasonal naive model
fit_lambs <- nsw_lambs %>% model(SNAIVE(Count))

#Plot the forecast
fit_lambs %>% forecast(h = "2 years") %>% autoplot(nsw_lambs) +
  labs(title = "Forecast of NSW Lamb Production", y = "Number of Lambs")

Household wealth (hh_budget).

hh_wealth <- hh_budget %>% select(Year, Wealth)

hh_wealth %>% autoplot(Wealth)

fit_wealth <- hh_wealth %>% model(RW(Wealth ~ drift()))

fit_wealth %>% forecast(h = 10) %>% autoplot(hh_wealth)

Australian takeaway food turnover (aus_retail).

```{r takeaway <- aus_retail %>% filter(Industry == “Takeaway Food Services”)

takeaway %>% autoplot(Turnover) + labs(title = “Takeaway Food Services Turnover”, y = “Turnover”)

Fit the SNAIVE model

fit_takeaway <- takeaway %>% model(SNAIVE(Turnover)) forecast_results <- fit_takeaway %>% forecast(h = “2 years”)

Plot the forecasted results

forecast_results %>% autoplot(takeaway) + labs(title = “Forecast of Takeaway Food Services Turnover”, y = “Turnover”)


2. Use the Facebook stock price (data set gafa_stock) to do the following:
  
Produce a time plot of the series.

```r
gafa_facebook <- gafa_stock %>% 
  filter(Symbol == "FB") %>%
  mutate(day = row_number()) %>%
  update_tsibble(index = day, regular = TRUE)

gafa_facebook %>% autoplot(Close) +
  labs(title= "Daily Close Price of Facebook", y = "Close Price(USD)")

Produce forecasts using the drift method and plot them.

fit_fb <- gafa_facebook %>% 
  model(RW(Close ~ drift()))

fit_fb %>% forecast(h = 50) %>% autoplot(gafa_facebook) +
  labs(title= "Daily Close Price of Facebook", y = "Close Price(USD)")

Show that the forecasts are identical to extending the line drawn between the first and last observations.

Show that the forecasts are identical to extending the line drawn between the first and last observations

first_last_line <- tibble(
  x = c(min(gafa_facebook$day), max(gafa_facebook$day)),
  y = c(first(gafa_facebook$Close), last(gafa_facebook$Close))
)

# Fit the model again with RW drift and overlay the line
fit_fb %>% forecast(h = 10) %>%
  autoplot(gafa_facebook) +
  geom_line(data = first_last_line, aes(x = x, y = y), color = "red", linetype = "dashed") +
  labs(title= "Daily Close Price Forecast with First and Last Line", y = "Close Price (USD)")

Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

fit_naive <- gafa_facebook %>% model(NAIVE(Close))
fit_snaive <- gafa_facebook %>% model(SNAIVE(Close))
## Warning: 1 error encountered for SNAIVE(Close)
## [1] Non-seasonal model specification provided, use RW() or provide a different lag specification.
# Forecast and plot
fit_naive %>% forecast(h = 10) %>% autoplot(gafa_facebook) +
  labs(title= "Naive Forecast of Facebook Close Price", y = "Close Price (USD)")

fit_snaive %>% forecast(h = 10) %>% autoplot(gafa_facebook) +
  labs(title= "Seasonal Naive Forecast of Facebook Close Price", y = "Close Price (USD)")
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

3.Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

What do you conclude?

  1. Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.
# Australian Exports
aus_exports <- global_economy %>%
  filter(Country == "Australia") %>%
  select(Year, Exports)

fit_exports <- aus_exports %>% model(NAIVE(Exports))
fit_exports %>% forecast(h = "2 years") %>% autoplot(aus_exports) +
  labs(title = "Forecast of Australian Exports", y = "Exports", x = "Year")

# Bricks
fit_bricks2 <- aus_production %>%
  filter(!is.na(Bricks)) %>%
  model(SNAIVE(Bricks ~ lag("year")))
fit_bricks2 %>% forecast() %>% autoplot(aus_production) +
  labs(title = "Bricks Production Forecast", y = "Bricks Production", x = "Quarter")
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).

  1. For your retail time series (from Exercise 7 in Section 2.10):
  1. Create a training dataset consisting of observations before 2011 using

  2. Check that your data have been split appropriately by producing the following plot.

  3. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

  4. Check the residuals.

Do the residuals appear to be uncorrelated and normally distributed?

  1. Produce forecasts for the test data

  2. Compare the accuracy of your forecasts against the actual values.

# For your retail time series (from Exercise 7 in Section 2.10):
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))

myseries_train <- myseries %>%
  filter(year(Month) < 2011)

# Check the split
autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red") +
  labs(title = "Training and Full Series", y = "Turnover")

# Fit a seasonal naïve model
fit_retail <- myseries_train %>% model(SNAIVE(Turnover))

# Check residuals
fit_retail |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

# Produce forecasts for the test data
fc <- fit_retail |> forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries) +
  labs(title = "Forecast vs Actual for Retail", y = "Turnover")

# Compare accuracy of forecasts against actual values
fit_retail |> accuracy() %>% select(MAE, RMSE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 5
##     MAE  RMSE  MAPE  MASE RMSSE
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  6.61  8.30  8.54     1     1
fc |> accuracy(myseries) %>% select(MAE, RMSE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 5
##     MAE  RMSE  MAPE  MASE RMSSE
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  29.9  38.7  15.6  4.52  4.66
  1. How sensitive are the accuracy measures to the amount of training data used?