Homework 3

Exercises: 5.1, 5.2, 5.3, 5.4 and 5.7

Loading libraries

library(fpp3)

## Warning: package 'fpp3' was built under R version 4.4.3

## Registered S3 methods overwritten by 'ggtime':
##   method           from      
##   autolayer.fbl_ts fabletools
##   autolayer.tbl_ts fabletools
##   autoplot.dcmp_ts fabletools
##   autoplot.fbl_ts  fabletools
##   autoplot.tbl_ts  fabletools
##   fortify.fbl_ts   fabletools

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.3 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.2.0
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ ggtime      0.2.0
## ✔ lubridate   1.9.4     ✔ feasts      0.5.0
## ✔ ggplot2     4.0.0     ✔ fable       0.5.0

## Warning: package 'tsibble' was built under R version 4.4.3

## Warning: package 'ggtime' was built under R version 4.4.3

## Warning: package 'feasts' was built under R version 4.4.3

## Warning: package 'fabletools' was built under R version 4.4.3

## Warning: package 'fable' was built under R version 4.4.3

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

5.1: Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

Data Prep

aus_population <- global_economy |> 
  filter(Country == "Australia") |> 
  select(Year, Population)

Data Visualization

aus_population |> 
  autoplot(Population) + 
  theme_light() +
  labs(
    title = "Australian Population", 
    y = "Population", 
    x = "Year"
  )

Specify Model

Since this is a population data then I think I should use the RW model since there is a steady upward trend.

fit <- aus_population |> 
  model(RW(Population ~ drift()))

Producing Forecast

aus_forecast <- fit |> forecast(h = 10)

Visualizing Forecasts

aus_forecast |> 
  autoplot(aus_population) + 
  theme_light() + 
  labs(
    title = "Australian Population Forecast", 
    x = "Year", 
    y = "Population"
  )

Bricks (aus_production)

Data Prep

bricks <- aus_production |>
  filter(!is.na(Bricks)) |> 
  select(Quarter, Bricks)

Data Visualization

bricks |> 
  autoplot(Bricks) + 
  theme_light() + 
  labs(
    title = "Australian Brick Production", 
    x = "Quarter", 
    y = "Bricks(millions)"
  )

Specify Model

Since this is seasonal the model I will use for this is SNAIVE.

fit <- bricks |> 
  model(SNAIVE(Bricks ~ lag("year")))

Producing Forecast

brick_forecast <- fit |> forecast(h = "2 years")

Visualizing Forecasts

brick_forecast |> 
  autoplot(bricks) + 
  theme_light() + 
  labs(
    title = "Australian Brick Production Forecast", 
    x = "Quarter", 
    y = "Bricks(millions)"
  )

NSW Lambs (aus_livestock)

Data Prep

nws_lambs <- aus_livestock |> 
  filter(State == "New South Wales", Animal == "Lambs") |> 
  select(Month, Count)

Data Visualization

nws_lambs |> 
  autoplot(Count) + 
  theme_light() + 
  labs(
    title = "NSW Lambs", 
    x = "Month", 
    y = "Count(thousands)"
  )

Specify Model

Since there is a seasonal pattern I will use the SNAIVE model.

fit <- nws_lambs |> 
  model(SNAIVE(Count ~ lag("year")))

Producing Forecast

lamb_forecast <- fit |> forecast(h = "2 years")

Visualizing Forecasts

lamb_forecast |> 
  autoplot(nws_lambs) + 
  labs( 
    title = "NSW Lamb Forecast", 
    x = "Month", 
    y = "Count(thousands)"
    )

Household wealth (hh_budget).

Data Prep

hh_wealth <- hh_budget |> 
  select(Country, Year, Wealth)

Data Visualization

hh_wealth |> 
  autoplot(Wealth) + 
  labs( 
    title = "Household Wealth", 
    x = "Year", 
    y = "Wealth"
    )

Specify Model

There isn’t a distinct seasonal trend so I will use the RW model.

fit <- hh_wealth |> 
  model(RW(Wealth ~ drift()))

Producing Forecast

wealth_forecast <- fit |> forecast(h = 10)

Visualizing Forecasts

wealth_forecast |> 
  autoplot(hh_wealth) +
  theme_light() + 
  labs( 
    title = "Household Wealth Forecast", 
    x = "Year", 
    y = "Wealth"
    )

Australian takeaway food turnover (aus_retail).

Data Prep

takeaway <- aus_retail |> 
  filter(Industry == "Takeaway food services") |> 
  summarise(Turnover = sum(Turnover))

Data Visualization

takeaway |> 
  autoplot(Turnover) + 
  labs( 
    title = "Australian Takeaway Food Turnover", 
    x = "Month", 
    y = "Turnover($millions)"
    )

Specify Model

There is a clear seasonal pattern so I will use the SNAIVE model here.

fit <- takeaway |> 
  model(SNAIVE(Turnover ~ lag("year")))

Producing Forecast

takeaway_forecast <- fit |> forecast(h = "2 years")

Visualizing Forecasts

takeaway_forecast |> 
  autoplot(takeaway) + 
  theme_light() + 
  labs( 
    title = "Australian Takeaway Food Turnover", 
    x = "Month", 
    y = "Turnover($millions)"
    )

5.2: Use the Facebook stock price (data set gafa_stock) to do the following:

a. Produce a time plot of the series.

Data Prep

facebook <- gafa_stock |> 
  filter(Symbol == "FB")

Time Plot

facebook |> 
  autoplot(Close) + 
  theme_light() + 
  labs(
    title = "Facebook Daily Closing Stock Price", 
    x = "Date", 
    y = "CLosing Price (USD)"
  )

b. Produce forecasts using the drift method and plot them.

Data Prep

facebook_fc <- gafa_stock |> 
  filter(Symbol == "FB") |>
  mutate(day = row_number()) |> 
  update_tsibble(index = day, regular = TRUE)

Specify Model

fit <- facebook_fc |> 
  model(RW(Close ~ drift()))

Forecasting

fb_forecast <- fit |> forecast(h = 90)

Visualize Forecast

fb_forecast |> 
  autoplot(facebook_fc) + 
  theme_light() + 
  labs( 
    title = "Facebook Stock Price Forecast", 
    x = "Trading Day", 
    y = "Closing Price (USD)"
    )

c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

Data Prep

facebook <- gafa_stock |> 
  filter(Symbol == "FB") |> 
  mutate(day = row_number()) |> 
  update_tsibble(index = day, regular = TRUE)

Trying All Benchmark Functions

fit <- facebook |> 
  model(
    Mean = MEAN(Close), 
    Naive = NAIVE(Close), 
    Drift = RW(Close ~ drift())
  )

Forecast

facebook_fc <- fit |> forecast(h = 90)

Visualize all forecast together

facebook_fc |>
  autoplot(facebook, level = NULL) + 
  labs( 
    title = "Facebook Stock Price - All Forecasts", 
    x = "Trading Day", 
    y = "Closing Price(USD)") + 
  guides(color = guide_legend(title = "Method"))

5.3: Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

# Extract data of interest
recent_production <- aus_production |>
  filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)

## What do you conclude?

I see a significant spike on the ACF graph around lag 4, which I am assuming the model is missing the pattern there. The histogram is skewed to the left so the model is making more negative errors. Ths means that the model is probably overestimating the production more often than not.

5.4: Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

Australian Exports

Data Prep

aus_exports <- global_economy |> 
  filter(Country == "Australia") |> 
  select(Year, Exports)

Model Fit

fit_exports <- aus_exports |> 
  model(NAIVE(Exports))

Checking Residuals

fit_exports |> gg_tsresiduals()

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_rug()`).

Forecast

fit_exports |> forecast() |> autoplot(aus_exports)

Bricks

Data Prep

bricks <- aus_production |> 
  filter(!is.na(Bricks)) |> 
  select(Quarter, Bricks)

Model Fit

fit_bricks <- bricks |> 
  model(SNAIVE(Bricks ~ lag("year")))

Checking Residuals

fit_bricks |> gg_tsresiduals()

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_rug()`).

Forecast

fit_bricks |> forecast() |> autoplot(bricks)

5.7: For your retail time series (from Exercise 7 in Section 2.10):

myseries <- aus_retail |> 
  filter(
    State == "New South Wales", 
    Industry == "Clothing retailing"
  )

head(myseries)

aus_retail |> distinct(State)

aus_retail |> distinct(Industry)

a. Create a training dataset consisting of observations before 2011 using

myseries_train <- myseries |>
  filter(year(Month) < 2011)

b. Check that your data have been split appropriately by producing the following plot.

autoplot(myseries, Turnover) +
  autolayer(myseries_train, Turnover, colour = "red")

c. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

fit <- myseries_train |>
  model(SNAIVE(Turnover))

d. Check the residuals.

fit |> gg_tsresiduals()

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_rug()`).

Do the residuals appear to be uncorrelated and normally distributed?

They appear to be uncorrelated. There are too many significant spikes in the ACF plot. The increasing variability inthe time plot shows that the model is not truly representing the recent years. THhe historgram ont he other hadn appears to be normally ditributed and we can assume some normality because of that. OVerall there is significant autocorrelation so the residuals fail the white noise test.

e. Produce forecasts for the test data

fc <- fit |>
  forecast(new_data = anti_join(myseries, myseries_train))

## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`

fc |> autoplot(myseries)

f. Compare the accuracy of your forecasts against the actual values.

fit |> accuracy()

fc |> accuracy(myseries)

g. How sensitive are the accuracy measures to the amount of training data used?

I think that if we use less training data then the errors would increase. But then also increasing the size will decrease the size of the test set and that would make it less accuate. The MPE is only 21%, and that suggests that the model is not capturing all of the patterns and a better more sophisticated model is needed.

Data624 - HW 3

Homework 3

Loading libraries

5.1: Produce forecasts for the following series using whichever of NAIVE(y), SNAIVE(y) or RW(y ~ drift()) is more appropriate in each case:

Australian Population (global_economy)

Data Prep

Data Visualization

Specify Model

Producing Forecast

Visualizing Forecasts

Bricks (aus_production)

Data Prep

Data Visualization

Specify Model

Producing Forecast

Visualizing Forecasts

NSW Lambs (aus_livestock)

Data Prep

Data Visualization

Specify Model

Producing Forecast

Visualizing Forecasts

Household wealth (hh_budget).

Data Prep

Data Visualization

Specify Model

Producing Forecast

Visualizing Forecasts

Australian takeaway food turnover (aus_retail).

Data Prep

Data Visualization

Specify Model

Producing Forecast

Visualizing Forecasts

5.2: Use the Facebook stock price (data set gafa_stock) to do the following:

a. Produce a time plot of the series.

Data Prep

Time Plot

b. Produce forecasts using the drift method and plot them.

Data Prep

Specify Model

Forecasting

Visualize Forecast

c. Show that the forecasts are identical to extending the line drawn between the first and last observations.

d. Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?

Data Prep

Trying All Benchmark Functions

Forecast

Visualize all forecast together

5.3: Apply a seasonal naïve method to the quarterly Australian beer production data from 1992. Check if the residuals look like white noise, and plot the forecasts. The following code will help.

5.4: Repeat the previous exercise using the Australian Exports series from global_economy and the Bricks series from aus_production. Use whichever of NAIVE() or SNAIVE() is more appropriate in each case.

Australian Exports

Data Prep

Model Fit

Checking Residuals

Forecast

Bricks

Data Prep

Model Fit

Checking Residuals

Forecast

5.7: For your retail time series (from Exercise 7 in Section 2.10):

a. Create a training dataset consisting of observations before 2011 using

b. Check that your data have been split appropriately by producing the following plot.

c. Fit a seasonal naïve model using SNAIVE() applied to your training data (myseries_train).

d. Check the residuals.

e. Produce forecasts for the test data

f. Compare the accuracy of your forecasts against the actual values.

g. How sensitive are the accuracy measures to the amount of training data used?