# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4 ✔ TTR 0.24.4
## ✔ quantmod 0.4.26 ✔ xts 0.14.0── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
# Remove largest outliars due to COVID start
new_claims_tbl <- claims_tbl %>%
filter(claims < 100000)
new_claims_tbl
## # A tibble: 11,353 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,343 more rows
new_claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1892
## 2 Massachusetts 1889
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
new_claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1892
## 2 Massachusetts 1889
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
new_claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = log(claims),
.facet_ncol = 2,
.facet_scales = "free",
.color_var = year(date))
new_claims_tbl %>%
plot_time_series(date, claims,
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
# Customize
.title = "New England Unemployment Claims Data",
.x_lab = "Date",
.y_lab = "Claims",
.color_lab = "Year")
new_claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1892
## 2 Massachusetts 1889
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
new_claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "2025") %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
new_claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82454 -0.21546 -0.03272 0.17843 2.35979
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.607e+00 2.782e-02 309.396 < 2e-16 ***
## as.numeric(date) -1.855e-05 1.974e-06 -9.401 < 2e-16 ***
## month(date, label = TRUE).L -2.827e-01 2.613e-02 -10.819 < 2e-16 ***
## month(date, label = TRUE).Q 4.428e-01 2.606e-02 16.991 < 2e-16 ***
## month(date, label = TRUE).C 5.383e-02 2.601e-02 2.069 0.038665 *
## month(date, label = TRUE)^4 4.979e-01 2.609e-02 19.088 < 2e-16 ***
## month(date, label = TRUE)^5 -1.495e-02 2.629e-02 -0.569 0.569659
## month(date, label = TRUE)^6 -3.560e-02 2.638e-02 -1.350 0.177292
## month(date, label = TRUE)^7 -1.189e-01 2.616e-02 -4.544 5.87e-06 ***
## month(date, label = TRUE)^8 6.151e-02 2.608e-02 2.358 0.018460 *
## month(date, label = TRUE)^9 1.816e-01 2.616e-02 6.942 5.31e-12 ***
## month(date, label = TRUE)^10 -8.673e-02 2.615e-02 -3.316 0.000931 ***
## month(date, label = TRUE)^11 8.209e-03 2.615e-02 0.314 0.753565
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3283 on 1879 degrees of freedom
## Multiple R-squared: 0.3378, Adjusted R-squared: 0.3336
## F-statistic: 79.89 on 12 and 1879 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.57768 -0.21366 -0.04541 0.17917 2.54955
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.224e+00 3.253e-02 283.602 < 2e-16 ***
## as.numeric(date) -2.370e-05 2.309e-06 -10.266 < 2e-16 ***
## month(date, label = TRUE).L -5.177e-02 3.055e-02 -1.694 0.09038 .
## month(date, label = TRUE).Q 5.990e-01 3.046e-02 19.667 < 2e-16 ***
## month(date, label = TRUE).C 1.620e-01 3.042e-02 5.324 1.14e-07 ***
## month(date, label = TRUE)^4 3.120e-01 3.051e-02 10.226 < 2e-16 ***
## month(date, label = TRUE)^5 -4.826e-02 3.073e-02 -1.570 0.11653
## month(date, label = TRUE)^6 -1.324e-01 3.084e-02 -4.294 1.85e-05 ***
## month(date, label = TRUE)^7 1.529e-02 3.060e-02 0.500 0.61730
## month(date, label = TRUE)^8 2.298e-02 3.052e-02 0.753 0.45151
## month(date, label = TRUE)^9 9.509e-02 3.062e-02 3.106 0.00193 **
## month(date, label = TRUE)^10 -8.207e-02 3.058e-02 -2.683 0.00735 **
## month(date, label = TRUE)^11 9.776e-03 3.055e-02 0.320 0.74903
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3836 on 1876 degrees of freedom
## Multiple R-squared: 0.2622, Adjusted R-squared: 0.2575
## F-statistic: 55.56 on 12 and 1876 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8060 -0.2450 -0.0667 0.1949 3.4642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.424e+00 3.283e-02 256.618 < 2e-16 ***
## as.numeric(date) -8.908e-05 2.329e-06 -38.255 < 2e-16 ***
## month(date, label = TRUE).L -2.096e-01 3.084e-02 -6.794 1.45e-11 ***
## month(date, label = TRUE).Q 8.159e-01 3.076e-02 26.521 < 2e-16 ***
## month(date, label = TRUE).C 2.211e-01 3.069e-02 7.204 8.40e-13 ***
## month(date, label = TRUE)^4 2.203e-01 3.079e-02 7.154 1.20e-12 ***
## month(date, label = TRUE)^5 -2.082e-01 3.103e-02 -6.709 2.58e-11 ***
## month(date, label = TRUE)^6 -1.985e-02 3.112e-02 -0.638 0.5236
## month(date, label = TRUE)^7 -1.313e-01 3.088e-02 -4.250 2.24e-05 ***
## month(date, label = TRUE)^8 5.486e-02 3.078e-02 1.782 0.0748 .
## month(date, label = TRUE)^9 1.266e-01 3.086e-02 4.103 4.25e-05 ***
## month(date, label = TRUE)^10 -7.191e-02 3.086e-02 -2.331 0.0199 *
## month(date, label = TRUE)^11 -5.272e-02 3.086e-02 -1.708 0.0877 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3875 on 1880 degrees of freedom
## Multiple R-squared: 0.5608, Adjusted R-squared: 0.558
## F-statistic: 200.1 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2788 -0.3674 -0.0482 0.2821 3.7898
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.199e+00 4.657e-02 154.605 < 2e-16 ***
## as.numeric(date) -3.108e-05 3.303e-06 -9.409 < 2e-16 ***
## month(date, label = TRUE).L -2.085e-01 4.375e-02 -4.766 2.02e-06 ***
## month(date, label = TRUE).Q 4.407e-01 4.364e-02 10.099 < 2e-16 ***
## month(date, label = TRUE).C 2.273e-01 4.354e-02 5.220 1.98e-07 ***
## month(date, label = TRUE)^4 3.040e-01 4.367e-02 6.960 4.69e-12 ***
## month(date, label = TRUE)^5 -4.349e-02 4.402e-02 -0.988 0.32331
## month(date, label = TRUE)^6 -1.709e-04 4.414e-02 -0.004 0.99691
## month(date, label = TRUE)^7 -1.362e-01 4.380e-02 -3.109 0.00190 **
## month(date, label = TRUE)^8 8.065e-02 4.366e-02 1.847 0.06487 .
## month(date, label = TRUE)^9 1.821e-01 4.378e-02 4.159 3.34e-05 ***
## month(date, label = TRUE)^10 -1.278e-01 4.377e-02 -2.920 0.00355 **
## month(date, label = TRUE)^11 4.820e-03 4.377e-02 0.110 0.91233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5497 on 1880 degrees of freedom
## Multiple R-squared: 0.1489, Adjusted R-squared: 0.1435
## F-statistic: 27.41 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0291 -0.2645 -0.0658 0.1784 3.4124
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.085e+00 3.702e-02 218.388 < 2e-16 ***
## as.numeric(date) -5.048e-05 2.626e-06 -19.219 < 2e-16 ***
## month(date, label = TRUE).L -3.382e-01 3.478e-02 -9.724 < 2e-16 ***
## month(date, label = TRUE).Q 5.731e-01 3.469e-02 16.518 < 2e-16 ***
## month(date, label = TRUE).C 1.823e-01 3.462e-02 5.267 1.54e-07 ***
## month(date, label = TRUE)^4 4.465e-01 3.473e-02 12.858 < 2e-16 ***
## month(date, label = TRUE)^5 5.934e-02 3.500e-02 1.696 0.090120 .
## month(date, label = TRUE)^6 -1.216e-01 3.510e-02 -3.464 0.000544 ***
## month(date, label = TRUE)^7 -2.601e-02 3.483e-02 -0.747 0.455283
## month(date, label = TRUE)^8 5.336e-02 3.471e-02 1.537 0.124408
## month(date, label = TRUE)^9 1.868e-01 3.481e-02 5.368 8.94e-08 ***
## month(date, label = TRUE)^10 -1.915e-01 3.480e-02 -5.503 4.25e-08 ***
## month(date, label = TRUE)^11 9.086e-03 3.480e-02 0.261 0.794072
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.437 on 1880 degrees of freedom
## Multiple R-squared: 0.3491, Adjusted R-squared: 0.345
## F-statistic: 84.04 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1616 -0.2387 -0.0416 0.2009 3.1763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.101e+00 3.359e-02 211.408 < 2e-16 ***
## as.numeric(date) -4.017e-05 2.383e-06 -16.859 < 2e-16 ***
## month(date, label = TRUE).L -8.441e-02 3.156e-02 -2.675 0.00754 **
## month(date, label = TRUE).Q 7.203e-01 3.148e-02 22.885 < 2e-16 ***
## month(date, label = TRUE).C 5.328e-01 3.141e-02 16.965 < 2e-16 ***
## month(date, label = TRUE)^4 8.634e-02 3.150e-02 2.741 0.00619 **
## month(date, label = TRUE)^5 -3.314e-01 3.175e-02 -10.439 < 2e-16 ***
## month(date, label = TRUE)^6 6.522e-03 3.184e-02 0.205 0.83773
## month(date, label = TRUE)^7 -1.434e-01 3.160e-02 -4.539 6.01e-06 ***
## month(date, label = TRUE)^8 -7.112e-03 3.149e-02 -0.226 0.82134
## month(date, label = TRUE)^9 1.947e-01 3.158e-02 6.166 8.58e-10 ***
## month(date, label = TRUE)^10 -1.690e-01 3.157e-02 -5.354 9.66e-08 ***
## month(date, label = TRUE)^11 7.044e-02 3.157e-02 2.231 0.02580 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3965 on 1880 degrees of freedom
## Multiple R-squared: 0.4093, Adjusted R-squared: 0.4055
## F-statistic: 108.5 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
new_claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, claims,
.lags = "1 year")
new_claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
new_claims_tbl%>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1892
## 2 Massachusetts 1889
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
new_claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
new_claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
new_claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, claims = mean(claims), .by = "month") %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
new_claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "1989-01-01",
.end_date = "2025") %>%
plot_time_series(date, claims, .facet_ncol = 2)
new_claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 79,470 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 0
## 3 Connecticut 1989-01-09 0
## 4 Connecticut 1989-01-10 0
## 5 Connecticut 1989-01-11 0
## 6 Connecticut 1989-01-12 0
## 7 Connecticut 1989-01-13 0
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 0
## 10 Connecticut 1989-01-16 0
## # ℹ 79,460 more rows
new_claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(claims, mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 5162
## 4 Connecticut 1989-01-28 4663 4242
## 5 Connecticut 1989-02-04 4162 4412.
## 6 Connecticut 1989-02-11 4337 4250.
## 7 Connecticut 1989-02-18 4079 4208
## 8 Connecticut 1989-02-25 3556 3818.
## 9 Connecticut 1989-03-04 3826 3691
## 10 Connecticut 1989-03-11 3515 3670.