# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo 
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.1── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(
    .date_var = date, 
    .value = claims,
    .facet_ncol = 2, 
    .facet_scales = "free", 
    .interactive = FALSE
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the timetk package.
##   Please report the issue at
##   <https://github.com/business-science/timetk/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • colour : "Legend"

Box plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series_boxplot(
    .date_var = date,
    .value    = claims,
    .period   = "1 year",
    .facet_ncol = 2
  )
## Ignoring unknown labels:
## • colour : "Legend"

Regression plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series_regression(
    .date_var = date,
    .facet_ncol = 2,
    .formula  = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
    .show_summary = TRUE
  )
## 
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8118 -0.2142 -0.0363  0.1725  3.2068 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.614e+00  2.767e-02 311.269  < 2e-16 ***
## as.numeric(date)             -1.913e-05  1.935e-06  -9.891  < 2e-16 ***
## month(date, label = TRUE).L  -2.814e-01  2.634e-02 -10.685  < 2e-16 ***
## month(date, label = TRUE).Q   4.352e-01  2.627e-02  16.565  < 2e-16 ***
## month(date, label = TRUE).C   6.298e-02  2.621e-02   2.403  0.01637 *  
## month(date, label = TRUE)^4   4.958e-01  2.631e-02  18.849  < 2e-16 ***
## month(date, label = TRUE)^5  -1.210e-02  2.649e-02  -0.457  0.64792    
## month(date, label = TRUE)^6  -2.454e-02  2.657e-02  -0.924  0.35578    
## month(date, label = TRUE)^7  -1.214e-01  2.637e-02  -4.604 4.41e-06 ***
## month(date, label = TRUE)^8   5.237e-02  2.630e-02   1.992  0.04656 *  
## month(date, label = TRUE)^9   1.875e-01  2.637e-02   7.111 1.61e-12 ***
## month(date, label = TRUE)^10 -9.883e-02  2.633e-02  -3.753  0.00018 ***
## month(date, label = TRUE)^11  2.105e-02  2.636e-02   0.799  0.42459    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3355 on 1933 degrees of freedom
## Multiple R-squared:  0.3288, Adjusted R-squared:  0.3246 
## F-statistic: 78.91 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5744 -0.2182 -0.0520  0.1720  3.4061 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   9.225e+00  3.344e-02 275.905  < 2e-16 ***
## as.numeric(date)             -2.361e-05  2.337e-06 -10.100  < 2e-16 ***
## month(date, label = TRUE).L  -6.932e-02  3.182e-02  -2.178 0.029515 *  
## month(date, label = TRUE).Q   5.922e-01  3.174e-02  18.658  < 2e-16 ***
## month(date, label = TRUE).C   1.857e-01  3.167e-02   5.862 5.37e-09 ***
## month(date, label = TRUE)^4   2.916e-01  3.179e-02   9.174  < 2e-16 ***
## month(date, label = TRUE)^5  -5.067e-02  3.200e-02  -1.584 0.113470    
## month(date, label = TRUE)^6  -1.111e-01  3.211e-02  -3.459 0.000553 ***
## month(date, label = TRUE)^7  -3.922e-03  3.187e-02  -0.123 0.902055    
## month(date, label = TRUE)^8   3.341e-02  3.177e-02   1.051 0.293223    
## month(date, label = TRUE)^9   9.801e-02  3.186e-02   3.077 0.002124 ** 
## month(date, label = TRUE)^10 -8.764e-02  3.182e-02  -2.755 0.005933 ** 
## month(date, label = TRUE)^11  1.453e-02  3.185e-02   0.456 0.648223    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4054 on 1933 degrees of freedom
## Multiple R-squared:  0.2386, Adjusted R-squared:  0.2339 
## F-statistic: 50.47 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7840 -0.2403 -0.0668  0.1902  3.4876 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.456e+00  3.177e-02 266.188  < 2e-16 ***
## as.numeric(date)             -9.187e-05  2.221e-06 -41.371  < 2e-16 ***
## month(date, label = TRUE).L  -2.002e-01  3.023e-02  -6.621 4.60e-11 ***
## month(date, label = TRUE).Q   8.158e-01  3.016e-02  27.052  < 2e-16 ***
## month(date, label = TRUE).C   2.228e-01  3.009e-02   7.403 1.98e-13 ***
## month(date, label = TRUE)^4   2.191e-01  3.020e-02   7.255 5.77e-13 ***
## month(date, label = TRUE)^5  -2.072e-01  3.040e-02  -6.815 1.26e-11 ***
## month(date, label = TRUE)^6  -2.378e-02  3.050e-02  -0.780   0.4358    
## month(date, label = TRUE)^7  -1.317e-01  3.028e-02  -4.350 1.43e-05 ***
## month(date, label = TRUE)^8   5.578e-02  3.019e-02   1.848   0.0648 .  
## month(date, label = TRUE)^9   1.229e-01  3.027e-02   4.059 5.12e-05 ***
## month(date, label = TRUE)^10 -7.245e-02  3.023e-02  -2.397   0.0166 *  
## month(date, label = TRUE)^11 -4.985e-02  3.026e-02  -1.647   0.0997 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3852 on 1933 degrees of freedom
## Multiple R-squared:  0.5808, Adjusted R-squared:  0.5782 
## F-statistic: 223.2 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2343 -0.3688 -0.0618  0.2738  3.8308 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.253e+00  4.513e-02 160.707  < 2e-16 ***
## as.numeric(date)             -3.579e-05  3.155e-06 -11.344  < 2e-16 ***
## month(date, label = TRUE).L  -2.066e-01  4.296e-02  -4.810 1.63e-06 ***
## month(date, label = TRUE).Q   4.368e-01  4.285e-02  10.196  < 2e-16 ***
## month(date, label = TRUE).C   2.261e-01  4.275e-02   5.288 1.38e-07 ***
## month(date, label = TRUE)^4   3.041e-01  4.290e-02   7.089 1.89e-12 ***
## month(date, label = TRUE)^5  -3.490e-02  4.320e-02  -0.808  0.41925    
## month(date, label = TRUE)^6  -3.508e-03  4.334e-02  -0.081  0.93550    
## month(date, label = TRUE)^7  -1.280e-01  4.301e-02  -2.976  0.00296 ** 
## month(date, label = TRUE)^8   8.625e-02  4.289e-02   2.011  0.04446 *  
## month(date, label = TRUE)^9   1.790e-01  4.300e-02   4.163 3.28e-05 ***
## month(date, label = TRUE)^10 -1.243e-01  4.295e-02  -2.894  0.00385 ** 
## month(date, label = TRUE)^11  7.915e-03  4.299e-02   0.184  0.85394    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5472 on 1933 degrees of freedom
## Multiple R-squared:  0.1618, Adjusted R-squared:  0.1566 
## F-statistic: 31.09 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0239 -0.2602 -0.0668  0.1755  3.4221 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   8.100e+00  3.583e-02 226.100  < 2e-16 ***
## as.numeric(date)             -5.177e-05  2.505e-06 -20.672  < 2e-16 ***
## month(date, label = TRUE).L  -3.388e-01  3.410e-02  -9.935  < 2e-16 ***
## month(date, label = TRUE).Q   5.761e-01  3.401e-02  16.938  < 2e-16 ***
## month(date, label = TRUE).C   1.804e-01  3.394e-02   5.316 1.18e-07 ***
## month(date, label = TRUE)^4   4.491e-01  3.406e-02  13.187  < 2e-16 ***
## month(date, label = TRUE)^5   6.250e-02  3.429e-02   1.823 0.068498 .  
## month(date, label = TRUE)^6  -1.254e-01  3.440e-02  -3.645 0.000274 ***
## month(date, label = TRUE)^7  -2.261e-02  3.414e-02  -0.662 0.507937    
## month(date, label = TRUE)^8   5.412e-02  3.405e-02   1.590 0.112106    
## month(date, label = TRUE)^9   1.822e-01  3.413e-02   5.337 1.06e-07 ***
## month(date, label = TRUE)^10 -1.906e-01  3.409e-02  -5.591 2.58e-08 ***
## month(date, label = TRUE)^11  1.037e-02  3.413e-02   0.304 0.761228    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4344 on 1933 degrees of freedom
## Multiple R-squared:  0.3619, Adjusted R-squared:  0.3579 
## F-statistic: 91.35 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----
## 
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1343 -0.2459 -0.0461  0.2005  3.2072 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   7.150e+00  3.276e-02 218.221  < 2e-16 ***
## as.numeric(date)             -4.448e-05  2.290e-06 -19.418  < 2e-16 ***
## month(date, label = TRUE).L  -8.413e-02  3.118e-02  -2.698  0.00704 ** 
## month(date, label = TRUE).Q   7.201e-01  3.110e-02  23.151  < 2e-16 ***
## month(date, label = TRUE).C   5.345e-01  3.104e-02  17.222  < 2e-16 ***
## month(date, label = TRUE)^4   8.826e-02  3.115e-02   2.834  0.00465 ** 
## month(date, label = TRUE)^5  -3.269e-01  3.136e-02 -10.424  < 2e-16 ***
## month(date, label = TRUE)^6   1.307e-03  3.146e-02   0.042  0.96687    
## month(date, label = TRUE)^7  -1.371e-01  3.123e-02  -4.391 1.19e-05 ***
## month(date, label = TRUE)^8  -2.954e-03  3.114e-02  -0.095  0.92441    
## month(date, label = TRUE)^9   1.933e-01  3.122e-02   6.193 7.19e-10 ***
## month(date, label = TRUE)^10 -1.706e-01  3.118e-02  -5.472 5.04e-08 ***
## month(date, label = TRUE)^11  7.481e-02  3.121e-02   2.397  0.01662 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3973 on 1933 degrees of freedom
## Multiple R-squared:  0.4227, Adjusted R-squared:  0.4191 
## F-statistic: 117.9 on 12 and 1933 DF,  p-value: < 2.2e-16
## 
## ----

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_acf_diagnostics(
    .date_var = date,
    .value    = claims,
    .lags     = "2 years", 
    .show_white_noise_bars = TRUE 
  )

Seasonality

claims_tbl %>%
  group_by(symbol) %>%
  plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
  group_by(symbol) %>%
  plot_stl_diagnostics(
    date, claims,
    .feature_set = c("observed", "season", "trend", "remainder")
  )
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
  group_by(symbol) %>%
  summarise_by_time(
    .date_var = date, 
    claims = sum(claims),
    .by = "quarter"
  ) %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"

Filter By Time

claims_tbl %>%
  group_by(symbol) %>%
  filter_by_time(
    .date_var = date,
    .start_date = "2008",
    .end_date = "2010"
  ) %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"

Padding Data

claims_tbl %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,676 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,666 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
  group_by(symbol) %>%
  mutate(rolling_avg_4 = slidify_vec(
    claims, mean,
    .period = 4,
    .align = "right",
    .partial = TRUE
  ))
## # A tibble: 11,676 × 4
## # Groups:   symbol [6]
##    symbol      date       claims rolling_avg_4
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         6223 
##  4 Connecticut 1989-01-28   4663         5833 
##  5 Connecticut 1989-02-04   4162         4787.
##  6 Connecticut 1989-02-11   4337         4246.
##  7 Connecticut 1989-02-18   4079         4310.
##  8 Connecticut 1989-02-25   3556         4034.
##  9 Connecticut 1989-03-04   3826         3950.
## 10 Connecticut 1989-03-11   3515         3744 
## # ℹ 11,666 more rows