Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting Time series

claims_tbl
## # A tibble: 11,358 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,348 more rows
claims_tbl %>%
    plot_time_series(.date_var = date, .value = log(claims))
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var     = date, 
        .value        = log(claims), 
        .facet_ncol   = 2,
        .facet_scales = "free",
        .interactive  = FALSE)

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var     = date, 
        .value        = log(claims), 
        .facet_ncol   = 2, 
        .facet_scales = "free"
        #, .color_var    = week(date))
    )
claims_tbl %>%
  plot_time_series(date, log(claims), 
                   .color_var = month(date, label = TRUE),
                   
                   # Returns static ggplot
                   .interactive = FALSE, 
                   
                   # Customize
                   .title     = "Jobless Claims",
                   .x_lab     = "Date (10-year intervals)",
                   .y_lab     = "Number log-adjusted", 
                   .color_lab = "month")  

Box plots

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>% 
    filter_by_time(.date_var = date, .end_date = "2024") %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var   = date,
        .value      = log(claims),
        .period     = "1 year",
        .facet_ncol = 2)

Regression plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var   = date, 
        .facet_ncol = 2, 
        .formula    = log(claims) ~ as.numeric(date) + month(date, label = TRUE), 
        .show_summary = FALSE)

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        date, log(claims), .lags = "3 years")

Seasonality

claims_tbl %>%
    plot_seasonal_diagnostics(date, log(claims))
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>%
    group_by(symbol) %>%
    plot_seasonal_diagnostics(date, log(claims))

## STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, log(claims), 
        .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

Daily Data

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(date, log(claims), .facet_ncol = 2, .interactive = FALSE)

Summarized by Quarter

claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, volume = sum(log(claims)), .by = "quarter") %>%
    plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, claims = mean(log(claims)), .by = "month") %>%
    plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

## Filter By Time

claims_tbl %>% 
    group_by(symbol) %>%
    filter_by_time(.date_var   = date, 
                   .start_date = "2023-04", 
                   .end_date   = "2023") %>%
    plot_time_series(date, log(claims), .facet_ncol = 2)

Padding Data

claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,358 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,348 more rows

## Sliding (Rolling) Calculations

claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 = slidify_vec(log(claims), mean, 
                                       .period  = 2, 
                                       .align   = "right", 
                                       .partial = TRUE))
## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345          9.03
##  2 Connecticut 1989-01-14   6503          8.90
##  3 Connecticut 1989-01-21   3821          8.51
##  4 Connecticut 1989-01-28   4663          8.35
##  5 Connecticut 1989-02-04   4162          8.39
##  6 Connecticut 1989-02-11   4337          8.35
##  7 Connecticut 1989-02-18   4079          8.34
##  8 Connecticut 1989-02-25   3556          8.24
##  9 Connecticut 1989-03-04   3826          8.21
## 10 Connecticut 1989-03-11   3515          8.21
# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 90, 
                   .unlist = FALSE, .align = "right")


claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(symbol, claims, numeric_date)) %>%
  filter(!is.na(rolling_lm))
## Warning: There were 22716 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `rolling_lm = lm_roll(symbol, claims, numeric_date)`.
## ℹ In group 1: `symbol = Connecticut`.
## Caused by warning in `model.response()`:
## ! using type = "numeric" with a factor response will be ignored
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 22715 remaining warnings.
## # A tibble: 10,824 × 5
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date rolling_lm
##    <fct>       <date>      <int>        <dbl> <list>    
##  1 Connecticut 1990-09-22   3927         7569 <lm>      
##  2 Connecticut 1990-09-29   4471         7576 <lm>      
##  3 Connecticut 1990-10-06   4430         7583 <lm>      
##  4 Connecticut 1990-10-13   4494         7590 <lm>      
##  5 Connecticut 1990-10-20   4894         7597 <lm>      
##  6 Connecticut 1990-10-27   4653         7604 <lm>      
##  7 Connecticut 1990-11-03   4719         7611 <lm>      
##  8 Connecticut 1990-11-10   5347         7618 <lm>      
##  9 Connecticut 1990-11-17   4824         7625 <lm>      
## 10 Connecticut 1990-11-24   5367         7632 <lm>      
## # ℹ 10,814 more rows