Part I

library(fable)
## Loading required package: fabletools
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
library(fabletools)
library(feasts)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tsibble)
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(ggplot2)
library(fredr)
library(tidyr)

fredr_set_key("430508d0ad1df25ecddc34d5581dafc8")

#all employees (nonfarm)
emp <- fredr("PAYEMS", 
             observation_start = as.Date("1939-01-01"), 
             observation_end = as.Date("2025-08-01") ) |> 
  mutate(Month = yearmonth(date), value) |>
  as_tsibble(index = Month) 

autoplot(emp)
## Plot variable not specified, automatically selected `.vars = value`

#seasonal difference 
emp |>
  gg_tsdisplay(difference(value, 12), plot_type='partial')
## Warning: `gg_tsdisplay()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsdisplay()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

##tried (1,1,0) and (0,1,1) because the ACF and PACF dont show strong movements and dont hit the blue lines. 

emp_fit <- emp |>
  model(arima110 = ARIMA(value ~ pdq(1,1,0)),
        arima011 = ARIMA(value ~ pdq(0,1,1)),
        stepwise = ARIMA(value),
        search = ARIMA(value, stepwise=FALSE))

glance(emp_fit) |> arrange(AICc) |> select(.model:BIC)
## # A tibble: 4 × 6
##   .model    sigma2 log_lik    AIC   AICc    BIC
##   <chr>      <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
## 1 stepwise 485062.  -8274. 16556. 16556. 16576.
## 2 search   485062.  -8274. 16556. 16556. 16576.
## 3 arima011 491139.  -8281. 16568. 16568. 16583.
## 4 arima110 491664.  -8282. 16569. 16569. 16584.
#the AICs for both models are pretty large. 


#Setting up the data to have a test and train sample with 80%/20% 
split_index <- floor(0.8 * nrow(emp)) 
train <- emp[1:split_index, ] 
test <- emp[(split_index + 1):nrow(emp), ] 

autoplot(train)
## Plot variable not specified, automatically selected `.vars = value`

#The ARIMA model gives Model: ARIMA(1,1,1)(2,0,2)[12], this AIC is much smaller than the ones I used. The model parameters selected were different from my manual selection. 

fit <- train|>
  model(ARIMA(value))
report(fit)
## Series: value 
## Model: ARIMA(1,1,2)(1,0,1)[12] w/ drift 
## 
## Coefficients:
##          ar1      ma1     ma2    sar1     sma1  constant
##       0.8817  -0.6680  0.1212  0.5982  -0.6569    6.1369
## s.e.  0.0253   0.0427  0.0370  0.2093   0.1956    0.9699
## 
## sigma^2 estimated as 32677:  log likelihood=-5495.34
## AIC=11004.67   AICc=11004.81   BIC=11037.73
#The AIC for the model is pretty small compared to the results from my manual selection of the parameters. The coefficients for AR1 shows the correlation with the prior time, which is pretty strong at .85. The MA1 shows that theres a negative relationship with the prior time periods shocks. 

fit |>
  gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##plot shows it is acting as white noise. 

fc<- fit |> forecast(h=nrow(test)) 

fc |>
  autoplot(train) +
  labs(y = "Median CPI", title = "Median CPI")

acc <- accuracy(object = fc, test ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE ) 

library(kableExtra) # Print with knitr::kable 
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kable(acc, caption = "Forecast Accuracy Metrics - ARIMA") |> 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE)
Forecast Accuracy Metrics - ARIMA
.model ME MPE RMSE MAE MAPE
ARIMA(value) -6795.826 -4.838017 7650.526 6795.826 4.838017
#The mean error is pretty small here at -1, which means the model is a pretty good fit. 


### ETS MODEL

ets <- train |> model(ETS(value))
#report(ets)

components(ets) |>
  autoplot() +
  labs(title = "ETS(M,N,A) components")
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).

fc_ets <- ets |>
  forecast(h = nrow(test))


ets_plot<- autoplot(train) +
  autolayer(fc_ets, color = "orange") +
  autolayer(test, color = "hotpink") +
  labs(
    title = "Employment (NonFarm)",
    y = "Thousands of People", 
    caption = "Data Source: FRED") +
    theme_light()
## Plot variable not specified, automatically selected `.vars = value`
## Plot variable not specified, automatically selected `.vars = value`
print(ets_plot)

acc <- accuracy(object = fc_ets, test ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE ) 

library(kableExtra) # Print with knitr::kable 

kable(acc, caption = "Forecast Accuracy Metrics - ETS") |> 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                full_width = FALSE)
Forecast Accuracy Metrics - ETS
.model ME MPE RMSE MAE MAPE
ETS(value) 6859.29 4.395092 11452.28 9326.642 6.270556

The ETS and ARIMA models are not the same. The two forecasting plots show their difference. Based off the AIC and ME terms neither are great fits for the data.

#Dynamic Regression Model

#ARIMA(y ~ x + pdq(1,1,0))

#housing starts
hs <- fredr("HOUST",
            observation_start = as.Date("1990-01-01")) |>
  transmute(Date = yearmonth(date), Housing = value)

#30yr rates
mort <- fredr("MORTGAGE30US",
              observation_start = as.Date("1990-01-01")) |>
  transmute(Date = yearmonth(date), MortgageRate = value) |>
  group_by(Date) |>
  summarize(MortgageRate = mean(MortgageRate, na.rm = TRUE))

housing_data <- left_join(hs, mort, by = "Date") |>
  arrange(Date) |>
  mutate(
    Housing = (Housing - lag(Housing)) / lag(Housing) * 100,
    MortgageRate = (MortgageRate - lag(MortgageRate)) / lag(MortgageRate) * 100
  ) |>
  drop_na() |>
  as_tsibble(index = Date)


fit <- housing_data |>
  model(ARIMA(Housing ~ MortgageRate))
report(fit)
## Series: Housing 
## Model: LM w/ ARIMA(1,0,0)(1,0,2)[12] errors 
## 
## Coefficients:
##           ar1    sar1     sma1     sma2  MortgageRate  intercept
##       -0.3332  0.5840  -0.7569  -0.0344        0.0085     0.2954
## s.e.   0.0463  0.1928   0.2040   0.0890        0.0801     0.1410
## 
## sigma^2 estimated as 51.56:  log likelihood=-1445.67
## AIC=2905.34   AICc=2905.6   BIC=2933.73
#residuals() function.

bind_rows(
    `Regression residuals` =
        as_tibble(residuals(fit, type = "regression")),
    `ARIMA residuals` =
        as_tibble(residuals(fit, type = "innovation")),
    .id = "type"
  ) |>
  mutate(
    type = factor(type, levels=c(
      "Regression residuals", "ARIMA residuals"))
  ) |>
  ggplot(aes(x = Date, y = .resid)) +
  geom_line() +
  facet_grid(vars(type))

The model looked at Housing activity on MortgageRate, using an ARIMA(1,0,0)(1,0,2)[12]. The use of mortgage rate on housinf starts was not statitically significant. No differenceing was used. It used AR(1) and then a Seasonal AR(1) and MA(2). Mortgage rates didnt have a big impact on the housing starts. The model found that housing is very seasonal. The AIC is very high which means its not a great fit.