# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.8 ──
## ✔ PerformanceAnalytics 2.0.4      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.26     ✔ xts                  0.13.2── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl %>%
    plot_time_series(.date = date, .value = claims)
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date, 
        .value = claims, 
        .facet_ncol = 2)

Box plots

claims_tbl %>%
    filter_by_time(.date_var = date, .end_date = "1989") %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var = date, 
        .value = claims, 
        .period = "1 year", 
        .facet_ncol = 2)

Regression plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var = date, 
        .facet_ncol = 2, 
        .formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE), 
        .show_summary = TRUE)
## 
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8406 -0.2157 -0.0311  0.1770  3.1917 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.590e+00  2.882e-02 298.027  < 2e-16 ***
## as.numeric(date)             -1.705e-05  2.056e-06  -8.291  < 2e-16 ***
## month(date, label = TRUE).L  -2.960e-01  2.705e-02 -10.941  < 2e-16 ***
## month(date, label = TRUE).Q   4.436e-01  2.693e-02  16.474  < 2e-16 ***
## month(date, label = TRUE).C   6.124e-02  2.687e-02   2.279 0.022777 *  
## month(date, label = TRUE)^4   4.920e-01  2.694e-02  18.261  < 2e-16 ***
## month(date, label = TRUE)^5  -1.926e-02  2.712e-02  -0.710 0.477796    
## month(date, label = TRUE)^6  -2.713e-02  2.719e-02  -0.998 0.318550    
## month(date, label = TRUE)^7  -1.240e-01  2.697e-02  -4.596 4.61e-06 ***
## month(date, label = TRUE)^8   5.703e-02  2.687e-02   2.122 0.033934 *  
## month(date, label = TRUE)^9   1.915e-01  2.691e-02   7.115 1.59e-12 ***
## month(date, label = TRUE)^10 -9.594e-02  2.685e-02  -3.573 0.000362 ***
## month(date, label = TRUE)^11  1.270e-02  2.682e-02   0.473 0.635964    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3367 on 1860 degrees of freedom
## Multiple R-squared:  0.3265, Adjusted R-squared:  0.3221 
## F-statistic: 75.13 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5926 -0.2213 -0.0507  0.1764  3.3829 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.196e+00  3.519e-02 261.337  < 2e-16 ***
## as.numeric(date)             -2.107e-05  2.510e-06  -8.393  < 2e-16 ***
## month(date, label = TRUE).L  -7.700e-02  3.303e-02  -2.331 0.019840 *  
## month(date, label = TRUE).Q   5.935e-01  3.287e-02  18.054  < 2e-16 ***
## month(date, label = TRUE).C   1.882e-01  3.280e-02   5.738 1.12e-08 ***
## month(date, label = TRUE)^4   2.882e-01  3.290e-02   8.760  < 2e-16 ***
## month(date, label = TRUE)^5  -4.975e-02  3.312e-02  -1.502 0.133228    
## month(date, label = TRUE)^6  -1.110e-01  3.320e-02  -3.344 0.000841 ***
## month(date, label = TRUE)^7  -8.522e-03  3.293e-02  -0.259 0.795830    
## month(date, label = TRUE)^8   3.314e-02  3.281e-02   1.010 0.312538    
## month(date, label = TRUE)^9   9.836e-02  3.286e-02   2.994 0.002793 ** 
## month(date, label = TRUE)^10 -9.020e-02  3.279e-02  -2.751 0.005997 ** 
## month(date, label = TRUE)^11  1.477e-02  3.274e-02   0.451 0.651918    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4111 on 1860 degrees of freedom
## Multiple R-squared:  0.2273, Adjusted R-squared:  0.2223 
## F-statistic: 45.58 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8237 -0.2484 -0.0636  0.1920  3.4535 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.409e+00  3.323e-02 253.034  < 2e-16 ***
## as.numeric(date)             -8.775e-05  2.371e-06 -37.016  < 2e-16 ***
## month(date, label = TRUE).L  -2.185e-01  3.119e-02  -7.006 3.43e-12 ***
## month(date, label = TRUE).Q   8.239e-01  3.105e-02  26.537  < 2e-16 ***
## month(date, label = TRUE).C   2.224e-01  3.098e-02   7.180 1.00e-12 ***
## month(date, label = TRUE)^4   2.186e-01  3.107e-02   7.038 2.74e-12 ***
## month(date, label = TRUE)^5  -2.066e-01  3.127e-02  -6.606 5.14e-11 ***
## month(date, label = TRUE)^6  -1.897e-02  3.135e-02  -0.605   0.5453    
## month(date, label = TRUE)^7  -1.336e-01  3.110e-02  -4.296 1.83e-05 ***
## month(date, label = TRUE)^8   5.612e-02  3.098e-02   1.811   0.0703 .  
## month(date, label = TRUE)^9   1.262e-01  3.103e-02   4.067 4.97e-05 ***
## month(date, label = TRUE)^10 -7.237e-02  3.096e-02  -2.337   0.0195 *  
## month(date, label = TRUE)^11 -5.242e-02  3.092e-02  -1.695   0.0902 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3882 on 1860 degrees of freedom
## Multiple R-squared:  0.5577, Adjusted R-squared:  0.5549 
## F-statistic: 195.5 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3055 -0.3663 -0.0445  0.2834  3.7652 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.169e+00  4.698e-02 152.575  < 2e-16 ***
## as.numeric(date)             -2.837e-05  3.352e-06  -8.464  < 2e-16 ***
## month(date, label = TRUE).L  -2.213e-01  4.410e-02  -5.019 5.68e-07 ***
## month(date, label = TRUE).Q   4.583e-01  4.389e-02  10.441  < 2e-16 ***
## month(date, label = TRUE).C   2.330e-01  4.380e-02   5.320 1.16e-07 ***
## month(date, label = TRUE)^4   3.041e-01  4.392e-02   6.924 6.04e-12 ***
## month(date, label = TRUE)^5  -4.043e-02  4.422e-02  -0.914  0.36061    
## month(date, label = TRUE)^6   2.923e-03  4.433e-02   0.066  0.94742    
## month(date, label = TRUE)^7  -1.389e-01  4.397e-02  -3.158  0.00161 ** 
## month(date, label = TRUE)^8   8.020e-02  4.380e-02   1.831  0.06728 .  
## month(date, label = TRUE)^9   1.846e-01  4.387e-02   4.207 2.71e-05 ***
## month(date, label = TRUE)^10 -1.310e-01  4.378e-02  -2.991  0.00281 ** 
## month(date, label = TRUE)^11  6.380e-03  4.372e-02   0.146  0.88399    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5489 on 1860 degrees of freedom
## Multiple R-squared:  0.1497, Adjusted R-squared:  0.1442 
## F-statistic: 27.28 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0319 -0.2661 -0.0657  0.1798  3.4030 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.078e+00  3.748e-02 215.516  < 2e-16 ***
## as.numeric(date)             -4.986e-05  2.674e-06 -18.647  < 2e-16 ***
## month(date, label = TRUE).L  -3.444e-01  3.518e-02  -9.789  < 2e-16 ***
## month(date, label = TRUE).Q   5.732e-01  3.502e-02  16.370  < 2e-16 ***
## month(date, label = TRUE).C   1.853e-01  3.494e-02   5.303 1.28e-07 ***
## month(date, label = TRUE)^4   4.429e-01  3.504e-02  12.639  < 2e-16 ***
## month(date, label = TRUE)^5   6.028e-02  3.528e-02   1.709 0.087634 .  
## month(date, label = TRUE)^6  -1.192e-01  3.536e-02  -3.371 0.000764 ***
## month(date, label = TRUE)^7  -2.815e-02  3.508e-02  -0.802 0.422403    
## month(date, label = TRUE)^8   5.431e-02  3.495e-02   1.554 0.120327    
## month(date, label = TRUE)^9   1.879e-01  3.500e-02   5.367 9.00e-08 ***
## month(date, label = TRUE)^10 -1.928e-01  3.493e-02  -5.521 3.84e-08 ***
## month(date, label = TRUE)^11  9.878e-03  3.488e-02   0.283 0.777052    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4379 on 1860 degrees of freedom
## Multiple R-squared:  0.3476, Adjusted R-squared:  0.3434 
## F-statistic:  82.6 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.17306 -0.23512 -0.04133  0.20115  3.15548 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.079e+00  3.389e-02 208.862  < 2e-16 ***
## as.numeric(date)             -3.827e-05  2.418e-06 -15.827  < 2e-16 ***
## month(date, label = TRUE).L  -9.411e-02  3.181e-02  -2.958  0.00313 ** 
## month(date, label = TRUE).Q   7.314e-01  3.166e-02  23.100  < 2e-16 ***
## month(date, label = TRUE).C   5.388e-01  3.160e-02  17.051  < 2e-16 ***
## month(date, label = TRUE)^4   8.408e-02  3.169e-02   2.653  0.00803 ** 
## month(date, label = TRUE)^5  -3.267e-01  3.190e-02 -10.243  < 2e-16 ***
## month(date, label = TRUE)^6   8.257e-03  3.198e-02   0.258  0.79627    
## month(date, label = TRUE)^7  -1.453e-01  3.172e-02  -4.582 4.91e-06 ***
## month(date, label = TRUE)^8  -5.897e-03  3.160e-02  -0.187  0.85198    
## month(date, label = TRUE)^9   1.952e-01  3.165e-02   6.169 8.41e-10 ***
## month(date, label = TRUE)^10 -1.703e-01  3.158e-02  -5.393 7.80e-08 ***
## month(date, label = TRUE)^11  7.119e-02  3.154e-02   2.257  0.02410 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3959 on 1860 degrees of freedom
## Multiple R-squared:  0.411,  Adjusted R-squared:  0.4072 
## F-statistic: 108.2 on 12 and 1860 DF,  p-value: < 2.2e-16
## 
## ----

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        date, 
        claims, 
        .lags = "20 weeks")

Seasonality

claims_tbl %>%
    plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, 
        claims,
        .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(date, 
                   claims, 
                   .facet_ncol = 2, 
                   .interactive = FALSE)

Filter By Time

claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date, 
                   .start_date = "1989-01", 
                   .end_date = "2024-11") %>%
    plot_time_series(date, 
                     claims, 
                     .facet_ncol = 2)

Padding Data

claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, 
                .by = "day", 
                .pad_value = 0)
## # A tibble: 78,630 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 78,620 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 = slidify_vec(claims, mean, 
                                       .period = 2, 
                                       .align = "right", 
                                       .partial = TRUE))
## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90, 
                   .unlist = FALSE, .align = "right")


reg_results <- claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(claims, numeric_date)) %>%
  filter(!is.na(rolling_lm))

reg_results
## # A tibble: 10,704 × 5
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date rolling_lm
##    <fct>       <date>      <int>        <dbl> <list>    
##  1 Connecticut 1990-09-22   3927         7569 <lm>      
##  2 Connecticut 1990-09-29   4471         7576 <lm>      
##  3 Connecticut 1990-10-06   4430         7583 <lm>      
##  4 Connecticut 1990-10-13   4494         7590 <lm>      
##  5 Connecticut 1990-10-20   4894         7597 <lm>      
##  6 Connecticut 1990-10-27   4653         7604 <lm>      
##  7 Connecticut 1990-11-03   4719         7611 <lm>      
##  8 Connecticut 1990-11-10   5347         7618 <lm>      
##  9 Connecticut 1990-11-17   4824         7625 <lm>      
## 10 Connecticut 1990-11-24   5367         7632 <lm>      
## # ℹ 10,694 more rows
reg_results$rolling_lm %>% .[[1]] %>% broom::tidy()
## # A tibble: 2 × 5
##   term         estimate std.error statistic p.value
##   <chr>           <dbl>     <dbl>     <dbl>   <dbl>
## 1 (Intercept) -11225.    6974.        -1.61  0.111 
## 2 ..2              2.19     0.961      2.28  0.0248
reg_results %>% mutate(rolling_lm = map(rolling_lm, broom::tidy)) %>% unnest(rolling_lm)
## # A tibble: 21,408 × 9
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date term  estimate std.error statistic
##    <fct>       <date>      <int>        <dbl> <chr>    <dbl>     <dbl>     <dbl>
##  1 Connecticut 1990-09-22   3927         7569 (Int…  -1.12e4  6974.        -1.61
##  2 Connecticut 1990-09-22   3927         7569 ..2     2.19e0     0.961      2.28
##  3 Connecticut 1990-09-29   4471         7576 (Int…  -1.40e4  6705.        -2.08
##  4 Connecticut 1990-09-29   4471         7576 ..2     2.56e0     0.923      2.78
##  5 Connecticut 1990-10-06   4430         7583 (Int…  -1.53e4  6617.        -2.31
##  6 Connecticut 1990-10-06   4430         7583 ..2     2.74e0     0.910      3.01
##  7 Connecticut 1990-10-13   4494         7590 (Int…  -1.46e4  6638.        -2.19
##  8 Connecticut 1990-10-13   4494         7590 ..2     2.64e0     0.912      2.89
##  9 Connecticut 1990-10-20   4894         7597 (Int…  -1.48e4  6638.        -2.23
## 10 Connecticut 1990-10-20   4894         7597 ..2     2.67e0     0.911      2.93
## # ℹ 21,398 more rows
## # ℹ 1 more variable: p.value <dbl>