# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.26     ✔ xts                  0.14.0── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)
# Remove largest outliars due to COVID start
new_claims_tbl <- claims_tbl %>%
    filter(claims < 100000)

Plotting time series

new_claims_tbl
## # A tibble: 11,353 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,343 more rows
new_claims_tbl %>% 
    plot_time_series(.date_var = date, .value = claims)
new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1892
## 2 Massachusetts  1889
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
new_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var   = date,
        .value      = claims,
        .facet_ncol = 2, 
        .facet_scales = "free",
        .interactive = FALSE)

Visualizing Transformations and Sub-Groups

new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1892
## 2 Massachusetts  1889
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
new_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var   = date,
        .value      = log(claims),
        .facet_ncol = 2, 
        .facet_scales = "free",
        .color_var = year(date))

Static ggplot2 Visualization and Customizations

new_claims_tbl %>%
  plot_time_series(date, claims, 
                   .color_var = month(date, label = TRUE),
                   
                   # Returns static ggplot
                   .interactive = FALSE,
                   
                   # Customize
                   .title = "New England Unemployment Claims Data",
                   .x_lab = "Date",
                   .y_lab = "Claims",
                   .color_lab = "Year")

Box plots

new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1892
## 2 Massachusetts  1889
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
new_claims_tbl %>%
    filter_by_time(.date_var = date, .end_date = "2025") %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var   = date,
        .value      = claims,
        .period     = "1 year", 
        .facet_ncol = 2)

Regression plots

new_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var = date,
        .facet_ncol = 2,
        .formula  = log(claims) ~ as.numeric(date) + month(date, label = TRUE), 
        .show_summary = TRUE)
## 
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82454 -0.21546 -0.03272  0.17843  2.35979 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.607e+00  2.782e-02 309.396  < 2e-16 ***
## as.numeric(date)             -1.855e-05  1.974e-06  -9.401  < 2e-16 ***
## month(date, label = TRUE).L  -2.827e-01  2.613e-02 -10.819  < 2e-16 ***
## month(date, label = TRUE).Q   4.428e-01  2.606e-02  16.991  < 2e-16 ***
## month(date, label = TRUE).C   5.383e-02  2.601e-02   2.069 0.038665 *  
## month(date, label = TRUE)^4   4.979e-01  2.609e-02  19.088  < 2e-16 ***
## month(date, label = TRUE)^5  -1.495e-02  2.629e-02  -0.569 0.569659    
## month(date, label = TRUE)^6  -3.560e-02  2.638e-02  -1.350 0.177292    
## month(date, label = TRUE)^7  -1.189e-01  2.616e-02  -4.544 5.87e-06 ***
## month(date, label = TRUE)^8   6.151e-02  2.608e-02   2.358 0.018460 *  
## month(date, label = TRUE)^9   1.816e-01  2.616e-02   6.942 5.31e-12 ***
## month(date, label = TRUE)^10 -8.673e-02  2.615e-02  -3.316 0.000931 ***
## month(date, label = TRUE)^11  8.209e-03  2.615e-02   0.314 0.753565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3283 on 1879 degrees of freedom
## Multiple R-squared:  0.3378, Adjusted R-squared:  0.3336 
## F-statistic: 79.89 on 12 and 1879 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.57768 -0.21366 -0.04541  0.17917  2.54955 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.224e+00  3.253e-02 283.602  < 2e-16 ***
## as.numeric(date)             -2.370e-05  2.309e-06 -10.266  < 2e-16 ***
## month(date, label = TRUE).L  -5.177e-02  3.055e-02  -1.694  0.09038 .  
## month(date, label = TRUE).Q   5.990e-01  3.046e-02  19.667  < 2e-16 ***
## month(date, label = TRUE).C   1.620e-01  3.042e-02   5.324 1.14e-07 ***
## month(date, label = TRUE)^4   3.120e-01  3.051e-02  10.226  < 2e-16 ***
## month(date, label = TRUE)^5  -4.826e-02  3.073e-02  -1.570  0.11653    
## month(date, label = TRUE)^6  -1.324e-01  3.084e-02  -4.294 1.85e-05 ***
## month(date, label = TRUE)^7   1.529e-02  3.060e-02   0.500  0.61730    
## month(date, label = TRUE)^8   2.298e-02  3.052e-02   0.753  0.45151    
## month(date, label = TRUE)^9   9.509e-02  3.062e-02   3.106  0.00193 ** 
## month(date, label = TRUE)^10 -8.207e-02  3.058e-02  -2.683  0.00735 ** 
## month(date, label = TRUE)^11  9.776e-03  3.055e-02   0.320  0.74903    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3836 on 1876 degrees of freedom
## Multiple R-squared:  0.2622, Adjusted R-squared:  0.2575 
## F-statistic: 55.56 on 12 and 1876 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8060 -0.2450 -0.0667  0.1949  3.4642 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.424e+00  3.283e-02 256.618  < 2e-16 ***
## as.numeric(date)             -8.908e-05  2.329e-06 -38.255  < 2e-16 ***
## month(date, label = TRUE).L  -2.096e-01  3.084e-02  -6.794 1.45e-11 ***
## month(date, label = TRUE).Q   8.159e-01  3.076e-02  26.521  < 2e-16 ***
## month(date, label = TRUE).C   2.211e-01  3.069e-02   7.204 8.40e-13 ***
## month(date, label = TRUE)^4   2.203e-01  3.079e-02   7.154 1.20e-12 ***
## month(date, label = TRUE)^5  -2.082e-01  3.103e-02  -6.709 2.58e-11 ***
## month(date, label = TRUE)^6  -1.985e-02  3.112e-02  -0.638   0.5236    
## month(date, label = TRUE)^7  -1.313e-01  3.088e-02  -4.250 2.24e-05 ***
## month(date, label = TRUE)^8   5.486e-02  3.078e-02   1.782   0.0748 .  
## month(date, label = TRUE)^9   1.266e-01  3.086e-02   4.103 4.25e-05 ***
## month(date, label = TRUE)^10 -7.191e-02  3.086e-02  -2.331   0.0199 *  
## month(date, label = TRUE)^11 -5.272e-02  3.086e-02  -1.708   0.0877 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3875 on 1880 degrees of freedom
## Multiple R-squared:  0.5608, Adjusted R-squared:  0.558 
## F-statistic: 200.1 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2788 -0.3674 -0.0482  0.2821  3.7898 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.199e+00  4.657e-02 154.605  < 2e-16 ***
## as.numeric(date)             -3.108e-05  3.303e-06  -9.409  < 2e-16 ***
## month(date, label = TRUE).L  -2.085e-01  4.375e-02  -4.766 2.02e-06 ***
## month(date, label = TRUE).Q   4.407e-01  4.364e-02  10.099  < 2e-16 ***
## month(date, label = TRUE).C   2.273e-01  4.354e-02   5.220 1.98e-07 ***
## month(date, label = TRUE)^4   3.040e-01  4.367e-02   6.960 4.69e-12 ***
## month(date, label = TRUE)^5  -4.349e-02  4.402e-02  -0.988  0.32331    
## month(date, label = TRUE)^6  -1.709e-04  4.414e-02  -0.004  0.99691    
## month(date, label = TRUE)^7  -1.362e-01  4.380e-02  -3.109  0.00190 ** 
## month(date, label = TRUE)^8   8.065e-02  4.366e-02   1.847  0.06487 .  
## month(date, label = TRUE)^9   1.821e-01  4.378e-02   4.159 3.34e-05 ***
## month(date, label = TRUE)^10 -1.278e-01  4.377e-02  -2.920  0.00355 ** 
## month(date, label = TRUE)^11  4.820e-03  4.377e-02   0.110  0.91233    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5497 on 1880 degrees of freedom
## Multiple R-squared:  0.1489, Adjusted R-squared:  0.1435 
## F-statistic: 27.41 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0291 -0.2645 -0.0658  0.1784  3.4124 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.085e+00  3.702e-02 218.388  < 2e-16 ***
## as.numeric(date)             -5.048e-05  2.626e-06 -19.219  < 2e-16 ***
## month(date, label = TRUE).L  -3.382e-01  3.478e-02  -9.724  < 2e-16 ***
## month(date, label = TRUE).Q   5.731e-01  3.469e-02  16.518  < 2e-16 ***
## month(date, label = TRUE).C   1.823e-01  3.462e-02   5.267 1.54e-07 ***
## month(date, label = TRUE)^4   4.465e-01  3.473e-02  12.858  < 2e-16 ***
## month(date, label = TRUE)^5   5.934e-02  3.500e-02   1.696 0.090120 .  
## month(date, label = TRUE)^6  -1.216e-01  3.510e-02  -3.464 0.000544 ***
## month(date, label = TRUE)^7  -2.601e-02  3.483e-02  -0.747 0.455283    
## month(date, label = TRUE)^8   5.336e-02  3.471e-02   1.537 0.124408    
## month(date, label = TRUE)^9   1.868e-01  3.481e-02   5.368 8.94e-08 ***
## month(date, label = TRUE)^10 -1.915e-01  3.480e-02  -5.503 4.25e-08 ***
## month(date, label = TRUE)^11  9.086e-03  3.480e-02   0.261 0.794072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.437 on 1880 degrees of freedom
## Multiple R-squared:  0.3491, Adjusted R-squared:  0.345 
## F-statistic: 84.04 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1616 -0.2387 -0.0416  0.2009  3.1763 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.101e+00  3.359e-02 211.408  < 2e-16 ***
## as.numeric(date)             -4.017e-05  2.383e-06 -16.859  < 2e-16 ***
## month(date, label = TRUE).L  -8.441e-02  3.156e-02  -2.675  0.00754 ** 
## month(date, label = TRUE).Q   7.203e-01  3.148e-02  22.885  < 2e-16 ***
## month(date, label = TRUE).C   5.328e-01  3.141e-02  16.965  < 2e-16 ***
## month(date, label = TRUE)^4   8.634e-02  3.150e-02   2.741  0.00619 ** 
## month(date, label = TRUE)^5  -3.314e-01  3.175e-02 -10.439  < 2e-16 ***
## month(date, label = TRUE)^6   6.522e-03  3.184e-02   0.205  0.83773    
## month(date, label = TRUE)^7  -1.434e-01  3.160e-02  -4.539 6.01e-06 ***
## month(date, label = TRUE)^8  -7.112e-03  3.149e-02  -0.226  0.82134    
## month(date, label = TRUE)^9   1.947e-01  3.158e-02   6.166 8.58e-10 ***
## month(date, label = TRUE)^10 -1.690e-01  3.157e-02  -5.354 9.66e-08 ***
## month(date, label = TRUE)^11  7.044e-02  3.157e-02   2.231  0.02580 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3965 on 1880 degrees of freedom
## Multiple R-squared:  0.4093, Adjusted R-squared:  0.4055 
## F-statistic: 108.5 on 12 and 1880 DF,  p-value: < 2.2e-16
## 
## ----

Plotting Seasonality and Correlation

Correlation Plots

new_claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        date, claims,
        .lags = "1 year")

Seasonality

new_claims_tbl %>%
    plot_seasonal_diagnostics(date, claims)
new_claims_tbl%>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1892
## 2 Massachusetts  1889
## 3 Maine          1893
## 4 New Hampshire  1893
## 5 Rhode Island   1893
## 6 Vermont        1893
new_claims_tbl %>%
    group_by(symbol) %>%
    plot_seasonal_diagnostics(date, claims)

STL Diagnostics

new_claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, claims,
        .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

new_claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, claims = mean(claims), .by = "month") %>%
    plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

Filter By Time

new_claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date,
                   .start_date = "1989-01-01",
                   .end_date = "2025") %>%
    plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

new_claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 79,470 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 79,460 more rows

Sliding (Rolling) Calculations

new_claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 = slidify_vec(claims, mean,
                                       .period = 2,
                                       .align = "right",
                                       .partial = TRUE))
## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.