DATA 624 - Homework #3

Problem 5.1
Problem 5.2
Problem 5.3
Problem 5.4
Problem 5.7

Problem 5.1

Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

aus_economy_pop <- global_economy %>% 
  filter(Country == 'Australia')

aus_fit_model <- aus_economy_pop %>%
  model(RW(Population ~ drift()))

aus_forecast <- aus_fit_model %>%
  forecast(h=12)

aus_forecast %>%
  autoplot(aus_economy_pop, level=NULL) +
  labs(
    y="Population",
    title = "Forecast of Australian Population"
  )

Bricks (aus_production)

bricks_data <- aus_production %>%
  filter_index("1970 Q1" ~ "2004 Q4") %>%
  select(Bricks)

bricks_model <- bricks_data %>%
  model(SNAIVE(Bricks))

bricks_fcast <- bricks_model %>%
  forecast(h=4)

bricks_fcast %>%
  autoplot(bricks_data)

NSW Lambs (aus_livestock)

nsw_lambs <- aus_livestock %>%
  filter(Animal == 'Lambs' & State == 'New South Wales')

nsw_lambs %>% autoplot(Count)

nsw_lambs_model <- nsw_lambs %>%
  model(SNAIVE(Count))
  

nsw_lambs_fcast <- nsw_lambs_model %>%
  forecast(h=24)


nsw_lambs_fcast %>%
  autoplot(nsw_lambs)

Household wealth (hh_budget)

hh_wealth <- as_tibble(hh_budget) %>%
  group_by(Year) %>%
  summarize(total_wealth = sum(Wealth)) %>%
  tsibble(index=Year)


hh_wealth %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = total_wealth`

hh_wealth_model <- hh_wealth %>%
  model(RW(total_wealth ~ drift()))


hh_wealth_fcast <- hh_wealth_model %>%
  forecast(h=5)


hh_wealth_fcast %>%
  autoplot(hh_wealth)

Australian takeaway food turnover (aus_retail)

aus_retail_turnover <- aus_retail %>%
  filter(Industry == 'Takeaway food services') %>%
  as_tibble() %>%
  group_by(Month) %>%
  summarize(total_turnover = sum(Turnover)) %>%
  as_tsibble()

## Using `Month` as index variable.

aus_retail_model <- aus_retail_turnover %>%
  model(SNAIVE(total_turnover))


aus_retail_fcast <- aus_retail_model %>% forecast(h=48) 

aus_retail_fcast %>%
  autoplot(aus_retail_turnover)

dcmp <- aus_retail_turnover |>
  model(STL(total_turnover ~ trend(window = 7), robust = TRUE)) %>%
  components() %>%
  select(-.model)

dcmp %>%
  model(NAIVE(season_adjust)) %>%
  forecast() %>%
  autoplot(dcmp)

fit_dcmp <- 
  aus_retail_turnover %>%
  model(stlf = decomposition_model(
    STL(total_turnover ~ trend(window=7), robust=TRUE),
    NAIVE(season_adjust)
  ))

fit_dcmp %>%
  forecast() %>%
  autoplot(aus_retail_turnover)

Problem 5.2

Use the Facebook stock price (data set gafa_stock) to do the following:

Produce a time plot of the series.

fb_trading <- gafa_stock %>%
  filter(Symbol == 'FB') %>%
  mutate(trading_day = row_number())

fb_trading %>% 
  autoplot(Close)

b. Produce forecasts using the drift method and plot them.

fb_trading <- fb_trading %>%
  update_tsibble(index=trading_day, regular=TRUE)

fb_trading_model <- fb_trading %>%
  model(RW(Close~drift()))

fb_trading_fcast <- fb_trading_model %>%
  forecast(h=30)

fb_trading_fcast %>%
  autoplot(fb_trading)

c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

fb_trading_fcast %>%
  autoplot(fb_trading) + 
  geom_segment(data=fb_trading, aes(x=1, y=fb_trading[[1,6]],xend=1258,yend=fb_trading[[1258,6]]))

## Warning: Use of `fb_trading[[1, 6]]` is discouraged.
## ℹ Use `.data[[1]]` instead.

## Warning: Use of `fb_trading[[1258, 6]]` is discouraged.
## ℹ Use `.data[[1258]]` instead.

d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

fb_trading_model <- fb_trading %>%
  model(
    Mean = MEAN(Close),
    Naive = NAIVE(Close),
    Drift = NAIVE(Close~drift())
    )


fb_trading_fcast <- fb_trading_model %>%
  forecast(h=30)


fb_trading_fcast %>%
  autoplot(fb_trading, level=NULL)

Problem 5.3

Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)

# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))

# Look at the residuals
fit |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values (`geom_line()`).

## Warning: Removed 4 rows containing missing values (`geom_point()`).

## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

What do you conclude?

Problem 5.4

Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

Australian Exports (global_economy)

# Extract data of interest
export_data <- global_economy %>%
  filter(Country == 'Australia') %>%
  select(Exports)

# Define and estimate a model
fit <- export_data |> model(NAIVE(Exports))

# Look at the residuals
fit |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values (`geom_line()`).

## Warning: Removed 1 rows containing missing values (`geom_point()`).

## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(export_data)

Bricks (aus_production)

# Define and estimate a model
fit <- bricks_data |> model(SNAIVE(Bricks))

# Look at the residuals
fit |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values (`geom_line()`).

## Warning: Removed 4 rows containing missing values (`geom_point()`).

## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).

# Look a some forecasts
fit |> forecast() |> autoplot(bricks_data)

Problem 5.7

For your retail time series (from Exercise 7 in Section 2.10):

Create a training dataset consisting of observations before 2011 using

set.seed(12345678)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))




myseries_train <- myseries |>
  filter(year(Month) < 2011)

Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train |>
  model(SNAIVE(Turnover))

Check the residuals.Do the residuals appear to be uncorrelated and normally distributed?

fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values (`geom_line()`).

## Warning: Removed 12 rows containing missing values (`geom_point()`).

## Warning: Removed 12 rows containing non-finite values (`stat_bin()`).

Produce forecasts for the test data

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

Compare the accuracy of your forecasts against the actual values.

fit |> accuracy()

## # A tibble: 1 × 12
##   State     Indus…¹ .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Northern… Clothi… SNAIV… Trai… 0.439  1.21 0.915  5.23  12.4     1     1 0.768
## # … with abbreviated variable name ¹Industry

fc |> accuracy(myseries)

## # A tibble: 1 × 12
##   .model     State Indus…¹ .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <chr>   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Tu… Nort… Clothi… Test  0.836  1.55  1.24  5.94  9.06  1.36  1.28 0.601
## # … with abbreviated variable name ¹Industry

How sensitive are the accuracy measures to the amount of training data used?