# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.4.3
## Warning: package 'zoo' was built under R version 4.4.3
## Warning: package 'quantmod' was built under R version 4.4.3
## Warning: package 'TTR' was built under R version 4.4.3
## Warning: package 'PerformanceAnalytics' was built under R version 4.4.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.27 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## Warning: package 'timetk' was built under R version 4.4.3
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl
## # A tibble: 11,358 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,348 more rows
claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
claims_tbl %>% group_by(symbol)
## # A tibble: 11,358 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,348 more rows
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "2025") %>%
group_by(symbol) %>%
plot_time_series_boxplot(.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8278 -0.2151 -0.0328 0.1752 3.2009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.601e+00 2.850e-02 301.745 < 2e-16 ***
## as.numeric(date) -1.799e-05 2.022e-06 -8.898 < 2e-16 ***
## month(date, label = TRUE).L -2.871e-01 2.678e-02 -10.721 < 2e-16 ***
## month(date, label = TRUE).Q 4.396e-01 2.671e-02 16.457 < 2e-16 ***
## month(date, label = TRUE).C 6.097e-02 2.665e-02 2.288 0.022255 *
## month(date, label = TRUE)^4 4.950e-01 2.673e-02 18.515 < 2e-16 ***
## month(date, label = TRUE)^5 -1.970e-02 2.694e-02 -0.731 0.464878
## month(date, label = TRUE)^6 -2.794e-02 2.702e-02 -1.034 0.301283
## month(date, label = TRUE)^7 -1.217e-01 2.681e-02 -4.537 6.05e-06 ***
## month(date, label = TRUE)^8 5.625e-02 2.672e-02 2.105 0.035448 *
## month(date, label = TRUE)^9 1.914e-01 2.680e-02 7.141 1.32e-12 ***
## month(date, label = TRUE)^10 -9.513e-02 2.679e-02 -3.551 0.000394 ***
## month(date, label = TRUE)^11 1.223e-02 2.679e-02 0.456 0.648166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3365 on 1880 degrees of freedom
## Multiple R-squared: 0.3264, Adjusted R-squared: 0.3221
## F-statistic: 75.92 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5900 -0.2204 -0.0512 0.1764 3.3906
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.201e+00 3.468e-02 265.286 < 2e-16 ***
## as.numeric(date) -2.153e-05 2.460e-06 -8.752 < 2e-16 ***
## month(date, label = TRUE).L -7.185e-02 3.259e-02 -2.205 0.027582 *
## month(date, label = TRUE).Q 5.933e-01 3.250e-02 18.253 < 2e-16 ***
## month(date, label = TRUE).C 1.869e-01 3.243e-02 5.764 9.59e-09 ***
## month(date, label = TRUE)^4 2.917e-01 3.253e-02 8.965 < 2e-16 ***
## month(date, label = TRUE)^5 -5.019e-02 3.279e-02 -1.531 0.126012
## month(date, label = TRUE)^6 -1.117e-01 3.288e-02 -3.398 0.000693 ***
## month(date, label = TRUE)^7 -6.546e-03 3.263e-02 -0.201 0.841016
## month(date, label = TRUE)^8 3.230e-02 3.252e-02 0.993 0.320800
## month(date, label = TRUE)^9 9.836e-02 3.261e-02 3.016 0.002593 **
## month(date, label = TRUE)^10 -8.964e-02 3.260e-02 -2.750 0.006023 **
## month(date, label = TRUE)^11 1.442e-02 3.260e-02 0.442 0.658340
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4094 on 1880 degrees of freedom
## Multiple R-squared: 0.2295, Adjusted R-squared: 0.2246
## F-statistic: 46.66 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8060 -0.2450 -0.0667 0.1949 3.4642
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.424e+00 3.283e-02 256.618 < 2e-16 ***
## as.numeric(date) -8.908e-05 2.329e-06 -38.255 < 2e-16 ***
## month(date, label = TRUE).L -2.096e-01 3.084e-02 -6.794 1.45e-11 ***
## month(date, label = TRUE).Q 8.159e-01 3.076e-02 26.521 < 2e-16 ***
## month(date, label = TRUE).C 2.211e-01 3.069e-02 7.204 8.40e-13 ***
## month(date, label = TRUE)^4 2.203e-01 3.079e-02 7.154 1.20e-12 ***
## month(date, label = TRUE)^5 -2.082e-01 3.103e-02 -6.709 2.58e-11 ***
## month(date, label = TRUE)^6 -1.985e-02 3.112e-02 -0.638 0.5236
## month(date, label = TRUE)^7 -1.313e-01 3.088e-02 -4.250 2.24e-05 ***
## month(date, label = TRUE)^8 5.486e-02 3.078e-02 1.782 0.0748 .
## month(date, label = TRUE)^9 1.266e-01 3.086e-02 4.103 4.25e-05 ***
## month(date, label = TRUE)^10 -7.191e-02 3.086e-02 -2.331 0.0199 *
## month(date, label = TRUE)^11 -5.272e-02 3.086e-02 -1.708 0.0877 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3875 on 1880 degrees of freedom
## Multiple R-squared: 0.5608, Adjusted R-squared: 0.558
## F-statistic: 200.1 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2788 -0.3674 -0.0482 0.2821 3.7898
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.199e+00 4.657e-02 154.605 < 2e-16 ***
## as.numeric(date) -3.108e-05 3.303e-06 -9.409 < 2e-16 ***
## month(date, label = TRUE).L -2.085e-01 4.375e-02 -4.766 2.02e-06 ***
## month(date, label = TRUE).Q 4.407e-01 4.364e-02 10.099 < 2e-16 ***
## month(date, label = TRUE).C 2.273e-01 4.354e-02 5.220 1.98e-07 ***
## month(date, label = TRUE)^4 3.040e-01 4.367e-02 6.960 4.69e-12 ***
## month(date, label = TRUE)^5 -4.349e-02 4.402e-02 -0.988 0.32331
## month(date, label = TRUE)^6 -1.709e-04 4.414e-02 -0.004 0.99691
## month(date, label = TRUE)^7 -1.362e-01 4.380e-02 -3.109 0.00190 **
## month(date, label = TRUE)^8 8.065e-02 4.366e-02 1.847 0.06487 .
## month(date, label = TRUE)^9 1.821e-01 4.378e-02 4.159 3.34e-05 ***
## month(date, label = TRUE)^10 -1.278e-01 4.377e-02 -2.920 0.00355 **
## month(date, label = TRUE)^11 4.820e-03 4.377e-02 0.110 0.91233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5497 on 1880 degrees of freedom
## Multiple R-squared: 0.1489, Adjusted R-squared: 0.1435
## F-statistic: 27.41 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0291 -0.2645 -0.0658 0.1784 3.4124
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.085e+00 3.702e-02 218.388 < 2e-16 ***
## as.numeric(date) -5.048e-05 2.626e-06 -19.219 < 2e-16 ***
## month(date, label = TRUE).L -3.382e-01 3.478e-02 -9.724 < 2e-16 ***
## month(date, label = TRUE).Q 5.731e-01 3.469e-02 16.518 < 2e-16 ***
## month(date, label = TRUE).C 1.823e-01 3.462e-02 5.267 1.54e-07 ***
## month(date, label = TRUE)^4 4.465e-01 3.473e-02 12.858 < 2e-16 ***
## month(date, label = TRUE)^5 5.934e-02 3.500e-02 1.696 0.090120 .
## month(date, label = TRUE)^6 -1.216e-01 3.510e-02 -3.464 0.000544 ***
## month(date, label = TRUE)^7 -2.601e-02 3.483e-02 -0.747 0.455283
## month(date, label = TRUE)^8 5.336e-02 3.471e-02 1.537 0.124408
## month(date, label = TRUE)^9 1.868e-01 3.481e-02 5.368 8.94e-08 ***
## month(date, label = TRUE)^10 -1.915e-01 3.480e-02 -5.503 4.25e-08 ***
## month(date, label = TRUE)^11 9.086e-03 3.480e-02 0.261 0.794072
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.437 on 1880 degrees of freedom
## Multiple R-squared: 0.3491, Adjusted R-squared: 0.345
## F-statistic: 84.04 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1616 -0.2387 -0.0416 0.2009 3.1763
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.101e+00 3.359e-02 211.408 < 2e-16 ***
## as.numeric(date) -4.017e-05 2.383e-06 -16.859 < 2e-16 ***
## month(date, label = TRUE).L -8.441e-02 3.156e-02 -2.675 0.00754 **
## month(date, label = TRUE).Q 7.203e-01 3.148e-02 22.885 < 2e-16 ***
## month(date, label = TRUE).C 5.328e-01 3.141e-02 16.965 < 2e-16 ***
## month(date, label = TRUE)^4 8.634e-02 3.150e-02 2.741 0.00619 **
## month(date, label = TRUE)^5 -3.314e-01 3.175e-02 -10.439 < 2e-16 ***
## month(date, label = TRUE)^6 6.522e-03 3.184e-02 0.205 0.83773
## month(date, label = TRUE)^7 -1.434e-01 3.160e-02 -4.539 6.01e-06 ***
## month(date, label = TRUE)^8 -7.112e-03 3.149e-02 -0.226 0.82134
## month(date, label = TRUE)^9 1.947e-01 3.158e-02 6.166 8.58e-10 ***
## month(date, label = TRUE)^10 -1.690e-01 3.157e-02 -5.354 9.66e-08 ***
## month(date, label = TRUE)^11 7.044e-02 3.157e-02 2.231 0.02580 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3965 on 1880 degrees of freedom
## Multiple R-squared: 0.4093, Adjusted R-squared: 0.4055
## F-statistic: 108.5 on 12 and 1880 DF, p-value: < 2.2e-16
##
## ----
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, claims,
.lags = "7 days")
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, claims,
.ccf_vars = c(date, claims),
.lags = "3 months")
claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
summarize_by_time(.date_var = date, .by = "month", claims_total = sum(claims, na.rm = TRUE)) %>%
plot_time_series(.date_var = date, .value = claims_total)
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "1989-09",
.end_date = "1990") %>%
plot_time_series(date, claims, .facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 79,470 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 0
## 3 Connecticut 1989-01-09 0
## 4 Connecticut 1989-01-10 0
## 5 Connecticut 1989-01-11 0
## 6 Connecticut 1989-01-12 0
## 7 Connecticut 1989-01-13 0
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 0
## 10 Connecticut 1989-01-16 0
## # ℹ 79,460 more rows
claims_tbl %>%
group_by(symbol) %>%
arrange(date) %>%
mutate(claims_roll_avg = slidify_vec(
.x = claims,
.f = mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 11,358 × 4
## # Groups: symbol [6]
## symbol date claims claims_roll_avg
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Maine 1989-01-07 4550 4550
## 3 Massachusetts 1989-01-07 12677 12677
## 4 New Hampshire 1989-01-07 1288 1288
## 5 Rhode Island 1989-01-07 2731 2731
## 6 Vermont 1989-01-07 1023 1023
## 7 Connecticut 1989-01-14 6503 7424
## 8 Maine 1989-01-14 3859 4204.
## 9 Massachusetts 1989-01-14 9937 11307
## 10 New Hampshire 1989-01-14 1101 1194.
## # ℹ 11,348 more rows
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90,
.unlist = FALSE, .align = "right")
claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(date)) %>%
# Apply rolling regression
mutate(rolling_lm = lm_roll(claims, numeric_date)) %>%
filter(!is.na(rolling_lm))
## # A tibble: 10,824 × 5
## # Groups: symbol [6]
## symbol date claims numeric_date rolling_lm
## <fct> <date> <int> <dbl> <list>
## 1 Connecticut 1990-09-22 3927 7569 <lm>
## 2 Connecticut 1990-09-29 4471 7576 <lm>
## 3 Connecticut 1990-10-06 4430 7583 <lm>
## 4 Connecticut 1990-10-13 4494 7590 <lm>
## 5 Connecticut 1990-10-20 4894 7597 <lm>
## 6 Connecticut 1990-10-27 4653 7604 <lm>
## 7 Connecticut 1990-11-03 4719 7611 <lm>
## 8 Connecticut 1990-11-10 5347 7618 <lm>
## 9 Connecticut 1990-11-17 4824 7625 <lm>
## 10 Connecticut 1990-11-24 5367 7632 <lm>
## # ℹ 10,814 more rows