# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.4.3
## Warning: package 'zoo' was built under R version 4.4.3
## Warning: package 'quantmod' was built under R version 4.4.3
## Warning: package 'TTR' was built under R version 4.4.3
## Warning: package 'PerformanceAnalytics' was built under R version 4.4.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## Warning: package 'timetk' was built under R version 4.4.3
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl
## # A tibble: 11,358 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,348 more rows
claims_tbl %>% 
  plot_time_series(.date_var = date, .value = claims)
claims_tbl %>% group_by(symbol)
## # A tibble: 11,358 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,348 more rows
claims_tbl %>% 
  group_by(symbol) %>%
  plot_time_series(
    .date_var     = date,
    .value        = claims,
    .facet_ncol   = 2,
    .facet_scales = "free",
    .interactive  = FALSE)

Box plots

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>% 
  filter_by_time(.date_var = date, .end_date = "2025") %>%
  group_by(symbol) %>%
  plot_time_series_boxplot(.date_var   = date,
                           .value      = claims,
                           .period     = "1 year",
                           .facet_ncol = 2)

Regression plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series_regression(
    .date_var = date, 
    .facet_ncol = 2,
    .formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
    .show_summary = TRUE)
## 
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8278 -0.2151 -0.0328  0.1752  3.2009 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.601e+00  2.850e-02 301.745  < 2e-16 ***
## as.numeric(date)             -1.799e-05  2.022e-06  -8.898  < 2e-16 ***
## month(date, label = TRUE).L  -2.871e-01  2.678e-02 -10.721  < 2e-16 ***
## month(date, label = TRUE).Q   4.396e-01  2.671e-02  16.457  < 2e-16 ***
## month(date, label = TRUE).C   6.097e-02  2.665e-02   2.288 0.022255 *  
## month(date, label = TRUE)^4   4.950e-01  2.673e-02  18.515  < 2e-16 ***
## month(date, label = TRUE)^5  -1.970e-02  2.694e-02  -0.731 0.464878    
## month(date, label = TRUE)^6  -2.794e-02  2.702e-02  -1.034 0.301283    
## month(date, label = TRUE)^7  -1.217e-01  2.681e-02  -4.537 6.05e-06 ***
## month(date, label = TRUE)^8   5.625e-02  2.672e-02   2.105 0.035448 *  
## month(date, label = TRUE)^9   1.914e-01  2.680e-02   7.141 1.32e-12 ***
## month(date, label = TRUE)^10 -9.513e-02  2.679e-02  -3.551 0.000394 ***
## month(date, label = TRUE)^11  1.223e-02  2.679e-02   0.456 0.648166    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3365 on 1880 degrees of freedom
## Multiple R-squared:  0.3264, Adjusted R-squared:  0.3221 
## F-statistic: 75.92 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5900 -0.2204 -0.0512  0.1764  3.3906 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.201e+00  3.468e-02 265.286  < 2e-16 ***
## as.numeric(date)             -2.153e-05  2.460e-06  -8.752  < 2e-16 ***
## month(date, label = TRUE).L  -7.185e-02  3.259e-02  -2.205 0.027582 *  
## month(date, label = TRUE).Q   5.933e-01  3.250e-02  18.253  < 2e-16 ***
## month(date, label = TRUE).C   1.869e-01  3.243e-02   5.764 9.59e-09 ***
## month(date, label = TRUE)^4   2.917e-01  3.253e-02   8.965  < 2e-16 ***
## month(date, label = TRUE)^5  -5.019e-02  3.279e-02  -1.531 0.126012    
## month(date, label = TRUE)^6  -1.117e-01  3.288e-02  -3.398 0.000693 ***
## month(date, label = TRUE)^7  -6.546e-03  3.263e-02  -0.201 0.841016    
## month(date, label = TRUE)^8   3.230e-02  3.252e-02   0.993 0.320800    
## month(date, label = TRUE)^9   9.836e-02  3.261e-02   3.016 0.002593 ** 
## month(date, label = TRUE)^10 -8.964e-02  3.260e-02  -2.750 0.006023 ** 
## month(date, label = TRUE)^11  1.442e-02  3.260e-02   0.442 0.658340    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4094 on 1880 degrees of freedom
## Multiple R-squared:  0.2295, Adjusted R-squared:  0.2246 
## F-statistic: 46.66 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8060 -0.2450 -0.0667  0.1949  3.4642 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.424e+00  3.283e-02 256.618  < 2e-16 ***
## as.numeric(date)             -8.908e-05  2.329e-06 -38.255  < 2e-16 ***
## month(date, label = TRUE).L  -2.096e-01  3.084e-02  -6.794 1.45e-11 ***
## month(date, label = TRUE).Q   8.159e-01  3.076e-02  26.521  < 2e-16 ***
## month(date, label = TRUE).C   2.211e-01  3.069e-02   7.204 8.40e-13 ***
## month(date, label = TRUE)^4   2.203e-01  3.079e-02   7.154 1.20e-12 ***
## month(date, label = TRUE)^5  -2.082e-01  3.103e-02  -6.709 2.58e-11 ***
## month(date, label = TRUE)^6  -1.985e-02  3.112e-02  -0.638   0.5236    
## month(date, label = TRUE)^7  -1.313e-01  3.088e-02  -4.250 2.24e-05 ***
## month(date, label = TRUE)^8   5.486e-02  3.078e-02   1.782   0.0748 .  
## month(date, label = TRUE)^9   1.266e-01  3.086e-02   4.103 4.25e-05 ***
## month(date, label = TRUE)^10 -7.191e-02  3.086e-02  -2.331   0.0199 *  
## month(date, label = TRUE)^11 -5.272e-02  3.086e-02  -1.708   0.0877 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3875 on 1880 degrees of freedom
## Multiple R-squared:  0.5608, Adjusted R-squared:  0.558 
## F-statistic: 200.1 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2788 -0.3674 -0.0482  0.2821  3.7898 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.199e+00  4.657e-02 154.605  < 2e-16 ***
## as.numeric(date)             -3.108e-05  3.303e-06  -9.409  < 2e-16 ***
## month(date, label = TRUE).L  -2.085e-01  4.375e-02  -4.766 2.02e-06 ***
## month(date, label = TRUE).Q   4.407e-01  4.364e-02  10.099  < 2e-16 ***
## month(date, label = TRUE).C   2.273e-01  4.354e-02   5.220 1.98e-07 ***
## month(date, label = TRUE)^4   3.040e-01  4.367e-02   6.960 4.69e-12 ***
## month(date, label = TRUE)^5  -4.349e-02  4.402e-02  -0.988  0.32331    
## month(date, label = TRUE)^6  -1.709e-04  4.414e-02  -0.004  0.99691    
## month(date, label = TRUE)^7  -1.362e-01  4.380e-02  -3.109  0.00190 ** 
## month(date, label = TRUE)^8   8.065e-02  4.366e-02   1.847  0.06487 .  
## month(date, label = TRUE)^9   1.821e-01  4.378e-02   4.159 3.34e-05 ***
## month(date, label = TRUE)^10 -1.278e-01  4.377e-02  -2.920  0.00355 ** 
## month(date, label = TRUE)^11  4.820e-03  4.377e-02   0.110  0.91233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5497 on 1880 degrees of freedom
## Multiple R-squared:  0.1489, Adjusted R-squared:  0.1435 
## F-statistic: 27.41 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0291 -0.2645 -0.0658  0.1784  3.4124 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.085e+00  3.702e-02 218.388  < 2e-16 ***
## as.numeric(date)             -5.048e-05  2.626e-06 -19.219  < 2e-16 ***
## month(date, label = TRUE).L  -3.382e-01  3.478e-02  -9.724  < 2e-16 ***
## month(date, label = TRUE).Q   5.731e-01  3.469e-02  16.518  < 2e-16 ***
## month(date, label = TRUE).C   1.823e-01  3.462e-02   5.267 1.54e-07 ***
## month(date, label = TRUE)^4   4.465e-01  3.473e-02  12.858  < 2e-16 ***
## month(date, label = TRUE)^5   5.934e-02  3.500e-02   1.696 0.090120 .  
## month(date, label = TRUE)^6  -1.216e-01  3.510e-02  -3.464 0.000544 ***
## month(date, label = TRUE)^7  -2.601e-02  3.483e-02  -0.747 0.455283    
## month(date, label = TRUE)^8   5.336e-02  3.471e-02   1.537 0.124408    
## month(date, label = TRUE)^9   1.868e-01  3.481e-02   5.368 8.94e-08 ***
## month(date, label = TRUE)^10 -1.915e-01  3.480e-02  -5.503 4.25e-08 ***
## month(date, label = TRUE)^11  9.086e-03  3.480e-02   0.261 0.794072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.437 on 1880 degrees of freedom
## Multiple R-squared:  0.3491, Adjusted R-squared:  0.345 
## F-statistic: 84.04 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1616 -0.2387 -0.0416  0.2009  3.1763 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.101e+00  3.359e-02 211.408  < 2e-16 ***
## as.numeric(date)             -4.017e-05  2.383e-06 -16.859  < 2e-16 ***
## month(date, label = TRUE).L  -8.441e-02  3.156e-02  -2.675  0.00754 ** 
## month(date, label = TRUE).Q   7.203e-01  3.148e-02  22.885  < 2e-16 ***
## month(date, label = TRUE).C   5.328e-01  3.141e-02  16.965  < 2e-16 ***
## month(date, label = TRUE)^4   8.634e-02  3.150e-02   2.741  0.00619 ** 
## month(date, label = TRUE)^5  -3.314e-01  3.175e-02 -10.439  < 2e-16 ***
## month(date, label = TRUE)^6   6.522e-03  3.184e-02   0.205  0.83773    
## month(date, label = TRUE)^7  -1.434e-01  3.160e-02  -4.539 6.01e-06 ***
## month(date, label = TRUE)^8  -7.112e-03  3.149e-02  -0.226  0.82134    
## month(date, label = TRUE)^9   1.947e-01  3.158e-02   6.166 8.58e-10 ***
## month(date, label = TRUE)^10 -1.690e-01  3.157e-02  -5.354 9.66e-08 ***
## month(date, label = TRUE)^11  7.044e-02  3.157e-02   2.231  0.02580 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3965 on 1880 degrees of freedom
## Multiple R-squared:  0.4093, Adjusted R-squared:  0.4055 
## F-statistic: 108.5 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_acf_diagnostics(
    date, claims, 
    .lags = "7 days")
claims_tbl %>%
  group_by(symbol) %>%
  plot_acf_diagnostics(
    date, claims,
    .ccf_vars = c(date, claims),
    .lags = "3 months")

Seasonality

claims_tbl %>%
  plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1893
## 2 Massachusetts  1893
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
claims_tbl %>%
  group_by(symbol) %>%
  plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
  group_by(symbol) %>%
  plot_stl_diagnostics(
    date, claims,
    .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
  group_by(symbol) %>%
  summarize_by_time(.date_var = date, .by = "month", claims_total = sum(claims, na.rm = TRUE)) %>%
  plot_time_series(.date_var = date, .value = claims_total)

Filter By Time

claims_tbl %>%
  group_by(symbol) %>%
  filter_by_time(.date_var = date,
                 .start_date = "1989-09",
                 .end_date = "1990") %>%
  plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

claims_tbl %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 79,470 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 79,460 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
  group_by(symbol) %>%
  arrange(date) %>%
  mutate(claims_roll_avg = slidify_vec(
      .x = claims,
      .f = mean, 
      .period = 2,
      .align = "right",
      .partial = TRUE))
## # A tibble: 11,358 × 4
## # Groups:   symbol [6]
##    symbol        date       claims claims_roll_avg
##    <fct>         <date>      <int>           <dbl>
##  1 Connecticut   1989-01-07   8345           8345 
##  2 Maine         1989-01-07   4550           4550 
##  3 Massachusetts 1989-01-07  12677          12677 
##  4 New Hampshire 1989-01-07   1288           1288 
##  5 Rhode Island  1989-01-07   2731           2731 
##  6 Vermont       1989-01-07   1023           1023 
##  7 Connecticut   1989-01-14   6503           7424 
##  8 Maine         1989-01-14   3859           4204.
##  9 Massachusetts 1989-01-14   9937          11307 
## 10 New Hampshire 1989-01-14   1101           1194.
## # ℹ 11,348 more rows
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90, 
                   .unlist = FALSE, .align = "right")


claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(claims, numeric_date)) %>%
  filter(!is.na(rolling_lm))
## # A tibble: 10,824 × 5
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date rolling_lm
##    <fct>       <date>      <int>        <dbl> <list>    
##  1 Connecticut 1990-09-22   3927         7569 <lm>      
##  2 Connecticut 1990-09-29   4471         7576 <lm>      
##  3 Connecticut 1990-10-06   4430         7583 <lm>      
##  4 Connecticut 1990-10-13   4494         7590 <lm>      
##  5 Connecticut 1990-10-20   4894         7597 <lm>      
##  6 Connecticut 1990-10-27   4653         7604 <lm>      
##  7 Connecticut 1990-11-03   4719         7611 <lm>      
##  8 Connecticut 1990-11-10   5347         7618 <lm>      
##  9 Connecticut 1990-11-17   4824         7625 <lm>      
## 10 Connecticut 1990-11-24   5367         7632 <lm>      
## # ℹ 10,814 more rows