# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.8 ──
## ✔ PerformanceAnalytics 2.0.4 ✔ TTR 0.24.4
## ✔ quantmod 0.4.26 ✔ xts 0.13.2── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl %>%
plot_time_series(.date = date, .value = claims)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2)
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "1989") %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8406 -0.2157 -0.0311 0.1770 3.1917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.590e+00 2.882e-02 298.027 < 2e-16 ***
## as.numeric(date) -1.705e-05 2.056e-06 -8.291 < 2e-16 ***
## month(date, label = TRUE).L -2.960e-01 2.705e-02 -10.941 < 2e-16 ***
## month(date, label = TRUE).Q 4.436e-01 2.693e-02 16.474 < 2e-16 ***
## month(date, label = TRUE).C 6.124e-02 2.687e-02 2.279 0.022777 *
## month(date, label = TRUE)^4 4.920e-01 2.694e-02 18.261 < 2e-16 ***
## month(date, label = TRUE)^5 -1.926e-02 2.712e-02 -0.710 0.477796
## month(date, label = TRUE)^6 -2.713e-02 2.719e-02 -0.998 0.318550
## month(date, label = TRUE)^7 -1.240e-01 2.697e-02 -4.596 4.61e-06 ***
## month(date, label = TRUE)^8 5.703e-02 2.687e-02 2.122 0.033934 *
## month(date, label = TRUE)^9 1.915e-01 2.691e-02 7.115 1.59e-12 ***
## month(date, label = TRUE)^10 -9.594e-02 2.685e-02 -3.573 0.000362 ***
## month(date, label = TRUE)^11 1.270e-02 2.682e-02 0.473 0.635964
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3367 on 1860 degrees of freedom
## Multiple R-squared: 0.3265, Adjusted R-squared: 0.3221
## F-statistic: 75.13 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5926 -0.2213 -0.0507 0.1764 3.3829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.196e+00 3.519e-02 261.337 < 2e-16 ***
## as.numeric(date) -2.107e-05 2.510e-06 -8.393 < 2e-16 ***
## month(date, label = TRUE).L -7.700e-02 3.303e-02 -2.331 0.019840 *
## month(date, label = TRUE).Q 5.935e-01 3.287e-02 18.054 < 2e-16 ***
## month(date, label = TRUE).C 1.882e-01 3.280e-02 5.738 1.12e-08 ***
## month(date, label = TRUE)^4 2.882e-01 3.290e-02 8.760 < 2e-16 ***
## month(date, label = TRUE)^5 -4.975e-02 3.312e-02 -1.502 0.133228
## month(date, label = TRUE)^6 -1.110e-01 3.320e-02 -3.344 0.000841 ***
## month(date, label = TRUE)^7 -8.522e-03 3.293e-02 -0.259 0.795830
## month(date, label = TRUE)^8 3.314e-02 3.281e-02 1.010 0.312538
## month(date, label = TRUE)^9 9.836e-02 3.286e-02 2.994 0.002793 **
## month(date, label = TRUE)^10 -9.020e-02 3.279e-02 -2.751 0.005997 **
## month(date, label = TRUE)^11 1.477e-02 3.274e-02 0.451 0.651918
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4111 on 1860 degrees of freedom
## Multiple R-squared: 0.2273, Adjusted R-squared: 0.2223
## F-statistic: 45.58 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8237 -0.2484 -0.0636 0.1920 3.4535
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.409e+00 3.323e-02 253.034 < 2e-16 ***
## as.numeric(date) -8.775e-05 2.371e-06 -37.016 < 2e-16 ***
## month(date, label = TRUE).L -2.185e-01 3.119e-02 -7.006 3.43e-12 ***
## month(date, label = TRUE).Q 8.239e-01 3.105e-02 26.537 < 2e-16 ***
## month(date, label = TRUE).C 2.224e-01 3.098e-02 7.180 1.00e-12 ***
## month(date, label = TRUE)^4 2.186e-01 3.107e-02 7.038 2.74e-12 ***
## month(date, label = TRUE)^5 -2.066e-01 3.127e-02 -6.606 5.14e-11 ***
## month(date, label = TRUE)^6 -1.897e-02 3.135e-02 -0.605 0.5453
## month(date, label = TRUE)^7 -1.336e-01 3.110e-02 -4.296 1.83e-05 ***
## month(date, label = TRUE)^8 5.612e-02 3.098e-02 1.811 0.0703 .
## month(date, label = TRUE)^9 1.262e-01 3.103e-02 4.067 4.97e-05 ***
## month(date, label = TRUE)^10 -7.237e-02 3.096e-02 -2.337 0.0195 *
## month(date, label = TRUE)^11 -5.242e-02 3.092e-02 -1.695 0.0902 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3882 on 1860 degrees of freedom
## Multiple R-squared: 0.5577, Adjusted R-squared: 0.5549
## F-statistic: 195.5 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3055 -0.3663 -0.0445 0.2834 3.7652
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.169e+00 4.698e-02 152.575 < 2e-16 ***
## as.numeric(date) -2.837e-05 3.352e-06 -8.464 < 2e-16 ***
## month(date, label = TRUE).L -2.213e-01 4.410e-02 -5.019 5.68e-07 ***
## month(date, label = TRUE).Q 4.583e-01 4.389e-02 10.441 < 2e-16 ***
## month(date, label = TRUE).C 2.330e-01 4.380e-02 5.320 1.16e-07 ***
## month(date, label = TRUE)^4 3.041e-01 4.392e-02 6.924 6.04e-12 ***
## month(date, label = TRUE)^5 -4.043e-02 4.422e-02 -0.914 0.36061
## month(date, label = TRUE)^6 2.923e-03 4.433e-02 0.066 0.94742
## month(date, label = TRUE)^7 -1.389e-01 4.397e-02 -3.158 0.00161 **
## month(date, label = TRUE)^8 8.020e-02 4.380e-02 1.831 0.06728 .
## month(date, label = TRUE)^9 1.846e-01 4.387e-02 4.207 2.71e-05 ***
## month(date, label = TRUE)^10 -1.310e-01 4.378e-02 -2.991 0.00281 **
## month(date, label = TRUE)^11 6.380e-03 4.372e-02 0.146 0.88399
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5489 on 1860 degrees of freedom
## Multiple R-squared: 0.1497, Adjusted R-squared: 0.1442
## F-statistic: 27.28 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0319 -0.2661 -0.0657 0.1798 3.4030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.078e+00 3.748e-02 215.516 < 2e-16 ***
## as.numeric(date) -4.986e-05 2.674e-06 -18.647 < 2e-16 ***
## month(date, label = TRUE).L -3.444e-01 3.518e-02 -9.789 < 2e-16 ***
## month(date, label = TRUE).Q 5.732e-01 3.502e-02 16.370 < 2e-16 ***
## month(date, label = TRUE).C 1.853e-01 3.494e-02 5.303 1.28e-07 ***
## month(date, label = TRUE)^4 4.429e-01 3.504e-02 12.639 < 2e-16 ***
## month(date, label = TRUE)^5 6.028e-02 3.528e-02 1.709 0.087634 .
## month(date, label = TRUE)^6 -1.192e-01 3.536e-02 -3.371 0.000764 ***
## month(date, label = TRUE)^7 -2.815e-02 3.508e-02 -0.802 0.422403
## month(date, label = TRUE)^8 5.431e-02 3.495e-02 1.554 0.120327
## month(date, label = TRUE)^9 1.879e-01 3.500e-02 5.367 9.00e-08 ***
## month(date, label = TRUE)^10 -1.928e-01 3.493e-02 -5.521 3.84e-08 ***
## month(date, label = TRUE)^11 9.878e-03 3.488e-02 0.283 0.777052
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4379 on 1860 degrees of freedom
## Multiple R-squared: 0.3476, Adjusted R-squared: 0.3434
## F-statistic: 82.6 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17306 -0.23512 -0.04133 0.20115 3.15548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.079e+00 3.389e-02 208.862 < 2e-16 ***
## as.numeric(date) -3.827e-05 2.418e-06 -15.827 < 2e-16 ***
## month(date, label = TRUE).L -9.411e-02 3.181e-02 -2.958 0.00313 **
## month(date, label = TRUE).Q 7.314e-01 3.166e-02 23.100 < 2e-16 ***
## month(date, label = TRUE).C 5.388e-01 3.160e-02 17.051 < 2e-16 ***
## month(date, label = TRUE)^4 8.408e-02 3.169e-02 2.653 0.00803 **
## month(date, label = TRUE)^5 -3.267e-01 3.190e-02 -10.243 < 2e-16 ***
## month(date, label = TRUE)^6 8.257e-03 3.198e-02 0.258 0.79627
## month(date, label = TRUE)^7 -1.453e-01 3.172e-02 -4.582 4.91e-06 ***
## month(date, label = TRUE)^8 -5.897e-03 3.160e-02 -0.187 0.85198
## month(date, label = TRUE)^9 1.952e-01 3.165e-02 6.169 8.41e-10 ***
## month(date, label = TRUE)^10 -1.703e-01 3.158e-02 -5.393 7.80e-08 ***
## month(date, label = TRUE)^11 7.119e-02 3.154e-02 2.257 0.02410 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3959 on 1860 degrees of freedom
## Multiple R-squared: 0.411, Adjusted R-squared: 0.4072
## F-statistic: 108.2 on 12 and 1860 DF, p-value: < 2.2e-16
##
## ----
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date,
claims,
.lags = "20 weeks")
claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date,
claims,
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(date,
claims,
.facet_ncol = 2,
.interactive = FALSE)
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "1989-01",
.end_date = "2024-11") %>%
plot_time_series(date,
claims,
.facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date,
.by = "day",
.pad_value = 0)
## # A tibble: 78,630 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 0
## 3 Connecticut 1989-01-09 0
## 4 Connecticut 1989-01-10 0
## 5 Connecticut 1989-01-11 0
## 6 Connecticut 1989-01-12 0
## 7 Connecticut 1989-01-13 0
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 0
## 10 Connecticut 1989-01-16 0
## # ℹ 78,620 more rows
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(claims, mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 5162
## 4 Connecticut 1989-01-28 4663 4242
## 5 Connecticut 1989-02-04 4162 4412.
## 6 Connecticut 1989-02-11 4337 4250.
## 7 Connecticut 1989-02-18 4079 4208
## 8 Connecticut 1989-02-25 3556 3818.
## 9 Connecticut 1989-03-04 3826 3691
## 10 Connecticut 1989-03-11 3515 3670.
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90,
.unlist = FALSE, .align = "right")
reg_results <- claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(date)) %>%
# Apply rolling regression
mutate(rolling_lm = lm_roll(claims, numeric_date)) %>%
filter(!is.na(rolling_lm))
reg_results
## # A tibble: 10,704 × 5
## # Groups: symbol [6]
## symbol date claims numeric_date rolling_lm
## <fct> <date> <int> <dbl> <list>
## 1 Connecticut 1990-09-22 3927 7569 <lm>
## 2 Connecticut 1990-09-29 4471 7576 <lm>
## 3 Connecticut 1990-10-06 4430 7583 <lm>
## 4 Connecticut 1990-10-13 4494 7590 <lm>
## 5 Connecticut 1990-10-20 4894 7597 <lm>
## 6 Connecticut 1990-10-27 4653 7604 <lm>
## 7 Connecticut 1990-11-03 4719 7611 <lm>
## 8 Connecticut 1990-11-10 5347 7618 <lm>
## 9 Connecticut 1990-11-17 4824 7625 <lm>
## 10 Connecticut 1990-11-24 5367 7632 <lm>
## # ℹ 10,694 more rows
reg_results$rolling_lm %>% .[[1]] %>% broom::tidy()
## # A tibble: 2 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -11225. 6974. -1.61 0.111
## 2 ..2 2.19 0.961 2.28 0.0248
reg_results %>% mutate(rolling_lm = map(rolling_lm, broom::tidy)) %>% unnest(rolling_lm)
## # A tibble: 21,408 × 9
## # Groups: symbol [6]
## symbol date claims numeric_date term estimate std.error statistic
## <fct> <date> <int> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Connecticut 1990-09-22 3927 7569 (Int… -1.12e4 6974. -1.61
## 2 Connecticut 1990-09-22 3927 7569 ..2 2.19e0 0.961 2.28
## 3 Connecticut 1990-09-29 4471 7576 (Int… -1.40e4 6705. -2.08
## 4 Connecticut 1990-09-29 4471 7576 ..2 2.56e0 0.923 2.78
## 5 Connecticut 1990-10-06 4430 7583 (Int… -1.53e4 6617. -2.31
## 6 Connecticut 1990-10-06 4430 7583 ..2 2.74e0 0.910 3.01
## 7 Connecticut 1990-10-13 4494 7590 (Int… -1.46e4 6638. -2.19
## 8 Connecticut 1990-10-13 4494 7590 ..2 2.64e0 0.912 2.89
## 9 Connecticut 1990-10-20 4894 7597 (Int… -1.48e4 6638. -2.23
## 10 Connecticut 1990-10-20 4894 7597 ..2 2.67e0 0.911 2.93
## # ℹ 21,398 more rows
## # ℹ 1 more variable: p.value <dbl>