Module 8

Part I

library(fable)

## Loading required package: fabletools

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

library(fabletools)
library(feasts)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tsibble)

## 
## Attaching package: 'tsibble'

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union

library(ggplot2)
library(fredr)

fredr_set_key("430508d0ad1df25ecddc34d5581dafc8")


unrate <- fredr("UNRATE", 
             observation_start = as.Date("1948-01-01"), 
             observation_end = as.Date("2025-08-01") ) |> 
  mutate(Month = yearmonth(date), value) |>
  as_tsibble(index = Month) 

autoplot(unrate)

## Plot variable not specified, automatically selected `.vars = value`

unrate |>
  gg_tsdisplay(difference(value), plot_type='partial')

## Warning: `gg_tsdisplay()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsdisplay()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

##tried (1,1,0) and (0,1,1) because the ACF and PACF dont show strong movements and dont hit the blue lines. 

unrate_fit <- unrate |>
  model(arima110 = ARIMA(value ~ pdq(1,1,0)),
        arima011 = ARIMA(value ~ pdq(0,1,1)),
        stepwise = ARIMA(value),
        search = ARIMA(value, stepwise=FALSE))

glance(unrate_fit) |> arrange(AICc) |> select(.model:BIC)

## # A tibble: 4 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 stepwise  0.171   -498. 1003. 1003. 1017.
## 2 search    0.171   -498. 1005. 1005. 1030.
## 3 arima011  0.172   -501. 1006. 1006. 1016.
## 4 arima110  0.172   -501. 1006. 1006. 1016.

#the AICs for both models are pretty large. 


#Setting up the data to have a test and train sample with 80%/20% 
split_index <- floor(0.8 * nrow(unrate)) 
train <- unrate[1:split_index, ] 
test <- unrate[(split_index + 1):nrow(unrate), ] 

autoplot(train)

## Plot variable not specified, automatically selected `.vars = value`

#The ARIMA model gives Model: ARIMA(1,1,2)(2,0,1)[12], this AIC is much smaller than the ones I used. The model paramters selected were differnt from my manual selection. 

fit <- train|>
  model(ARIMA(value))
report(fit)

## Series: value 
## Model: ARIMA(1,1,2)(2,0,1)[12] 
## 
## Coefficients:
##          ar1      ma1     ma2    sar1     sar2     sma1
##       0.8355  -0.8527  0.2391  0.4795  -0.0863  -0.7149
## s.e.  0.0365   0.0499  0.0370  0.0992   0.0536   0.0948
## 
## sigma^2 estimated as 0.03701:  log likelihood=172.02
## AIC=-330.05   AICc=-329.9   BIC=-297.77

fc<- fit |> forecast(h=nrow(test)) 

fc |>
  autoplot(train) +
  labs(y = "Unemployment Rate", title = "Unemployment")

acc <- accuracy(object = fc, test ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE ) 

library(kableExtra) # Print with knitr::kable

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

kable(acc, caption = "Forecast Accuracy Metrics") |> 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE)

Forecast Accuracy Metrics
.model	ME	MPE	RMSE	MAE	MAPE
ARIMA(value)	-2.198506	-55.07731	2.937776	2.48955	57.68755

unrate |>
  ACF(value) |>
  autoplot()

unrate |>
  PACF(value) |>
  autoplot()

fit |>
  gg_tsresiduals()

## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##plot shows it is acting as white noise.

Part II

#median CPI
mcpi <- fredr("MEDCPIM158SFRBCLE", 
             observation_start = as.Date("1948-01-01"), 
             observation_end = as.Date("2025-08-01") ) |> 
  mutate(Month = yearmonth(date), value) |>
  as_tsibble(index = Month) 

autoplot(mcpi)

## Plot variable not specified, automatically selected `.vars = value`

#seasonal difference 
mcpi |>
  gg_tsdisplay(difference(value, 12), plot_type='partial')

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

##tried (1,1,0) and (0,1,1) because the ACF and PACF dont show strong movements and dont hit the blue lines. 

mcpi_fit <- mcpi |>
  model(arima110 = ARIMA(value ~ pdq(1,1,0)),
        arima011 = ARIMA(value ~ pdq(0,1,1)),
        stepwise = ARIMA(value),
        search = ARIMA(value, stepwise=FALSE))

glance(mcpi_fit) |> arrange(AICc) |> select(.model:BIC)

## # A tibble: 4 × 6
##   .model   sigma2 log_lik   AIC  AICc   BIC
##   <chr>     <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 arima011  0.588   -589. 1183. 1183. 1196.
## 2 stepwise  0.588   -589. 1183. 1183. 1196.
## 3 search    0.590   -588. 1188. 1188. 1214.
## 4 arima110  0.660   -618. 1243. 1243. 1260.

#the AICs for both models are pretty large. 


#Setting up the data to have a test and train sample with 80%/20% 
split_index <- floor(0.8 * nrow(mcpi)) 
train <- mcpi[1:split_index, ] 
test <- mcpi[(split_index + 1):nrow(mcpi), ] 

autoplot(train)

## Plot variable not specified, automatically selected `.vars = value`

#The ARIMA model gives Model: (0,1,1)(1,0,0)[12], this AIC is slightly smaller than the ones I used. The model parameters selected were different from my manual selection. I selected the (1,1,0).

fit <- train|>
  model(ARIMA(value))
report(fit)

## Series: value 
## Model: ARIMA(0,1,1)(1,0,0)[12] 
## 
## Coefficients:
##           ma1     sar1
##       -0.7664  -0.1035
## s.e.   0.0296   0.0501
## 
## sigma^2 estimated as 0.5416:  log likelihood=-453.35
## AIC=912.7   AICc=912.75   BIC=924.73

#The AIC for the model is pretty small compared to the results from my manual selection of the parameters. The coefficients for AR1 shows the correlation with the prior time, which is pretty strong at .85. The MA1 shows that theres a negative relationship with the prior time periods shocks. 

fit |>
  gg_tsresiduals()

##plot shows it is acting as white noise. 

fc<- fit |> forecast(h=nrow(test)) 

fc |>
  autoplot(train) +
  labs(y = "Median CPI", title = "Median CPI")

acc <- accuracy(object = fc, test ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE ) 

library(kableExtra) # Print with knitr::kable 

kable(acc, caption = "Forecast Accuracy Metrics") |> 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE)

Forecast Accuracy Metrics
.model	ME	MPE	RMSE	MAE	MAPE
ARIMA(value)	1.129489	16.17006	1.997608	1.400813	32.6086

#The mean error is pretty small here at -1, which means the model is a pretty good fit.

Module 8

Hannah Farrell

2025-11-06

Part I

Part II