# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4 ✔ TTR 0.24.4
## ✔ quantmod 0.4.25 ✔ xts 0.13.2── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl
## # A tibble: 11,220 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,210 more rows
claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1870
## 2 Massachusetts 1870
## 3 Maine 1870
## 4 New Hampshire 1870
## 5 Rhode Island 1870
## 6 Vermont 1870
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
Visualizing Transformations & Sub-Groups
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1870
## 2 Massachusetts 1870
## 3 Maine 1870
## 4 New Hampshire 1870
## 5 Rhode Island 1870
## 6 Vermont 1870
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(.date_var = date,
.value = log(claims),
.facet_ncol = 2,
.facet_scales = "free",
.color_var = year(date),
.smooth = FALSE)
Static ggplot2 Visualizations & Customizations
claims_tbl %>%
plot_time_series(date, claims,
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
.smooth = FALSE,
# Customize
.title = "Monthly Initial Unemployment Claims in New England States",
.x_lab = "Date",
.y_lab = "Unemployment Claims",
.color_lab = "Month")
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1870
## 2 Massachusetts 1870
## 3 Maine 1870
## 4 New Hampshire 1870
## 5 Rhode Island 1870
## 6 Vermont 1870
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "1990") %>%
group_by(symbol) %>%
plot_time_series_boxplot(.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
## Warning: There were 30 warnings in `dplyr::mutate()`.
## The first warning was:
## ℹ In argument: `.value_smooth = auto_smooth(...)`.
## ℹ In group 1: `symbol = Connecticut`.
## Caused by warning in `simpleLoess()`:
## ! span too small. fewer data values than degrees of freedom.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 29 remaining warnings.
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(.date_var = date,
.formula = log(claims) ~ as.numeric(date) +
month(date, label = TRUE),
.show_summary = TRUE)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8412 -0.2155 -0.0309 0.1774 3.1912
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.589e+00 2.888e-02 297.344 < 2e-16 ***
## as.numeric(date) -1.694e-05 2.062e-06 -8.212 4.02e-16 ***
## month(date, label = TRUE).L -2.949e-01 2.711e-02 -10.879 < 2e-16 ***
## month(date, label = TRUE).Q 4.443e-01 2.696e-02 16.481 < 2e-16 ***
## month(date, label = TRUE).C 6.112e-02 2.689e-02 2.273 0.023128 *
## month(date, label = TRUE)^4 4.911e-01 2.699e-02 18.199 < 2e-16 ***
## month(date, label = TRUE)^5 -2.063e-02 2.720e-02 -0.758 0.448327
## month(date, label = TRUE)^6 -2.853e-02 2.727e-02 -1.046 0.295512
## month(date, label = TRUE)^7 -1.251e-01 2.703e-02 -4.628 3.95e-06 ***
## month(date, label = TRUE)^8 5.630e-02 2.690e-02 2.093 0.036503 *
## month(date, label = TRUE)^9 1.911e-01 2.693e-02 7.095 1.83e-12 ***
## month(date, label = TRUE)^10 -9.609e-02 2.687e-02 -3.576 0.000358 ***
## month(date, label = TRUE)^11 1.266e-02 2.684e-02 0.472 0.637302
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3369 on 1857 degrees of freedom
## Multiple R-squared: 0.3257, Adjusted R-squared: 0.3213
## F-statistic: 74.75 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5930 -0.2217 -0.0511 0.1769 3.3826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.195e+00 3.527e-02 260.722 < 2e-16 ***
## as.numeric(date) -2.100e-05 2.518e-06 -8.341 < 2e-16 ***
## month(date, label = TRUE).L -7.636e-02 3.310e-02 -2.307 0.021160 *
## month(date, label = TRUE).Q 5.939e-01 3.292e-02 18.043 < 2e-16 ***
## month(date, label = TRUE).C 1.881e-01 3.283e-02 5.731 1.16e-08 ***
## month(date, label = TRUE)^4 2.877e-01 3.295e-02 8.730 < 2e-16 ***
## month(date, label = TRUE)^5 -5.053e-02 3.321e-02 -1.522 0.128257
## month(date, label = TRUE)^6 -1.118e-01 3.330e-02 -3.359 0.000798 ***
## month(date, label = TRUE)^7 -9.170e-03 3.300e-02 -0.278 0.781156
## month(date, label = TRUE)^8 3.272e-02 3.285e-02 0.996 0.319355
## month(date, label = TRUE)^9 9.814e-02 3.289e-02 2.984 0.002880 **
## month(date, label = TRUE)^10 -9.029e-02 3.281e-02 -2.752 0.005988 **
## month(date, label = TRUE)^11 1.475e-02 3.277e-02 0.450 0.652701
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4114 on 1857 degrees of freedom
## Multiple R-squared: 0.2272, Adjusted R-squared: 0.2222
## F-statistic: 45.49 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8236 -0.2487 -0.0647 0.1923 3.4536
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.409e+00 3.331e-02 252.451 < 2e-16 ***
## as.numeric(date) -8.776e-05 2.378e-06 -36.901 < 2e-16 ***
## month(date, label = TRUE).L -2.185e-01 3.126e-02 -6.992 3.78e-12 ***
## month(date, label = TRUE).Q 8.238e-01 3.109e-02 26.501 < 2e-16 ***
## month(date, label = TRUE).C 2.224e-01 3.101e-02 7.175 1.04e-12 ***
## month(date, label = TRUE)^4 2.187e-01 3.112e-02 7.027 2.96e-12 ***
## month(date, label = TRUE)^5 -2.066e-01 3.136e-02 -6.586 5.87e-11 ***
## month(date, label = TRUE)^6 -1.892e-02 3.145e-02 -0.602 0.5474
## month(date, label = TRUE)^7 -1.336e-01 3.117e-02 -4.285 1.92e-05 ***
## month(date, label = TRUE)^8 5.614e-02 3.103e-02 1.809 0.0705 .
## month(date, label = TRUE)^9 1.262e-01 3.106e-02 4.063 5.04e-05 ***
## month(date, label = TRUE)^10 -7.236e-02 3.099e-02 -2.335 0.0196 *
## month(date, label = TRUE)^11 -5.242e-02 3.095e-02 -1.694 0.0905 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3885 on 1857 degrees of freedom
## Multiple R-squared: 0.5574, Adjusted R-squared: 0.5545
## F-statistic: 194.9 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3077 -0.3652 -0.0452 0.2823 3.7632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.164e+00 4.705e-02 152.255 < 2e-16 ***
## as.numeric(date) -2.797e-05 3.359e-06 -8.326 < 2e-16 ***
## month(date, label = TRUE).L -2.173e-01 4.416e-02 -4.922 9.31e-07 ***
## month(date, label = TRUE).Q 4.608e-01 4.391e-02 10.493 < 2e-16 ***
## month(date, label = TRUE).C 2.326e-01 4.380e-02 5.311 1.22e-07 ***
## month(date, label = TRUE)^4 3.008e-01 4.396e-02 6.843 1.05e-11 ***
## month(date, label = TRUE)^5 -4.534e-02 4.431e-02 -1.023 0.30624
## month(date, label = TRUE)^6 -2.115e-03 4.442e-02 -0.048 0.96203
## month(date, label = TRUE)^7 -1.429e-01 4.403e-02 -3.245 0.00119 **
## month(date, label = TRUE)^8 7.758e-02 4.383e-02 1.770 0.07685 .
## month(date, label = TRUE)^9 1.832e-01 4.388e-02 4.176 3.11e-05 ***
## month(date, label = TRUE)^10 -1.315e-01 4.378e-02 -3.004 0.00270 **
## month(date, label = TRUE)^11 6.228e-03 4.372e-02 0.142 0.88673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5488 on 1857 degrees of freedom
## Multiple R-squared: 0.1487, Adjusted R-squared: 0.1432
## F-statistic: 27.03 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0324 -0.2652 -0.0656 0.1801 3.4025
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.077e+00 3.756e-02 215.065 < 2e-16 ***
## as.numeric(date) -4.976e-05 2.682e-06 -18.558 < 2e-16 ***
## month(date, label = TRUE).L -3.434e-01 3.525e-02 -9.743 < 2e-16 ***
## month(date, label = TRUE).Q 5.739e-01 3.505e-02 16.372 < 2e-16 ***
## month(date, label = TRUE).C 1.852e-01 3.496e-02 5.297 1.31e-07 ***
## month(date, label = TRUE)^4 4.421e-01 3.509e-02 12.599 < 2e-16 ***
## month(date, label = TRUE)^5 5.909e-02 3.536e-02 1.671 0.094934 .
## month(date, label = TRUE)^6 -1.204e-01 3.546e-02 -3.397 0.000696 ***
## month(date, label = TRUE)^7 -2.913e-02 3.515e-02 -0.829 0.407237
## month(date, label = TRUE)^8 5.367e-02 3.498e-02 1.534 0.125128
## month(date, label = TRUE)^9 1.875e-01 3.502e-02 5.354 9.65e-08 ***
## month(date, label = TRUE)^10 -1.930e-01 3.494e-02 -5.522 3.82e-08 ***
## month(date, label = TRUE)^11 9.841e-03 3.490e-02 0.282 0.777962
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4381 on 1857 degrees of freedom
## Multiple R-squared: 0.3466, Adjusted R-squared: 0.3424
## F-statistic: 82.1 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17595 -0.23458 -0.04234 0.20191 3.15309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.074e+00 3.389e-02 208.704 < 2e-16 ***
## as.numeric(date) -3.778e-05 2.420e-06 -15.614 < 2e-16 ***
## month(date, label = TRUE).L -8.930e-02 3.181e-02 -2.808 0.00504 **
## month(date, label = TRUE).Q 7.344e-01 3.163e-02 23.218 < 2e-16 ***
## month(date, label = TRUE).C 5.382e-01 3.155e-02 17.061 < 2e-16 ***
## month(date, label = TRUE)^4 8.014e-02 3.167e-02 2.531 0.01146 *
## month(date, label = TRUE)^5 -3.326e-01 3.191e-02 -10.423 < 2e-16 ***
## month(date, label = TRUE)^6 2.187e-03 3.200e-02 0.068 0.94550
## month(date, label = TRUE)^7 -1.502e-01 3.172e-02 -4.736 2.35e-06 ***
## month(date, label = TRUE)^8 -9.043e-03 3.157e-02 -0.286 0.77456
## month(date, label = TRUE)^9 1.936e-01 3.160e-02 6.126 1.10e-09 ***
## month(date, label = TRUE)^10 -1.710e-01 3.153e-02 -5.422 6.67e-08 ***
## month(date, label = TRUE)^11 7.101e-02 3.149e-02 2.255 0.02425 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3953 on 1857 degrees of freedom
## Multiple R-squared: 0.4123, Adjusted R-squared: 0.4085
## F-statistic: 108.6 on 12 and 1857 DF, p-value: < 2.2e-16
##
## ----
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(date, claims,
.lags = "1 year")
claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(claims)
## # A tibble: 5,373 × 2
## claims n
## <int> <int>
## 1 152 1
## 2 154 1
## 3 181 1
## 4 184 2
## 5 187 1
## 6 188 1
## 7 189 1
## 8 200 3
## 9 201 1
## 10 203 1
## # ℹ 5,363 more rows
# I have split the states into two graphs for a better visualization.
claims_tbl %>%
filter(symbol == c("Connecticut", "Massachusetts", "Maine")) %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims,
.feature_set = "quarter")
claims_tbl %>%
filter(symbol == c("New Hampshire", "Rhode Island", "Vermont")) %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims,
.feature_set = "quarter")
# Split it again for better visualization.
claims_tbl %>%
filter(symbol == c("Connecticut", "Massachusetts", "Maine")) %>%
group_by(symbol) %>%
plot_stl_diagnostics(date, claims,
.feature_set = c("observed", "season", "trend", "seasadj"))
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
claims_tbl %>%
filter(symbol == c("New Hampshire", "Rhode Island", "Vermont")) %>%
group_by(symbol) %>%
plot_stl_diagnostics(date, claims,
.feature_set = c("observed", "season", "trend", "seasadj"))
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
## frequency = 17 observations per 1 year
## trend = 87 observations per 5 years
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, volume = mean(claims), .by = "quarter") %>%
plot_time_series(date, volume,
.facet_ncol = 2,
.interactive = FALSE)
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "2000-01",
.end_date = "2003-12") %>%
plot_time_series(date, claims, .facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .fill_na_direction = "down")
## # A tibble: 78,504 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 8345
## 3 Connecticut 1989-01-09 8345
## 4 Connecticut 1989-01-10 8345
## 5 Connecticut 1989-01-11 8345
## 6 Connecticut 1989-01-12 8345
## 7 Connecticut 1989-01-13 8345
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 6503
## 10 Connecticut 1989-01-16 6503
## # ℹ 78,494 more rows
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(claims, mean,
.period = 5,
.align = "center",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 6223
## 2 Connecticut 1989-01-14 6503 5833
## 3 Connecticut 1989-01-21 3821 5499.
## 4 Connecticut 1989-01-28 4663 4697.
## 5 Connecticut 1989-02-04 4162 4212.
## 6 Connecticut 1989-02-11 4337 4159.
## 7 Connecticut 1989-02-18 4079 3992
## 8 Connecticut 1989-02-25 3556 3863.
## 9 Connecticut 1989-03-04 3826 3744
## 10 Connecticut 1989-03-11 3515 3632.
# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..2 ~ ..1), .period = 3, .unlist = FALSE, .align = "right", .partial = TRUE)
# Apply the rolling regression
claims_tbl %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(date)) %>% # Convert date to numeric
mutate(rolling_lm = lm_roll(numeric_date, claims)) %>% # Apply rolling regression
filter(!is.na(rolling_lm))
## # A tibble: 11,220 × 5
## # Groups: symbol [6]
## symbol date claims numeric_date rolling_lm
## <fct> <date> <int> <dbl> <list>
## 1 Connecticut 1989-01-07 8345 6946 <lm>
## 2 Connecticut 1989-01-14 6503 6953 <lm>
## 3 Connecticut 1989-01-21 3821 6960 <lm>
## 4 Connecticut 1989-01-28 4663 6967 <lm>
## 5 Connecticut 1989-02-04 4162 6974 <lm>
## 6 Connecticut 1989-02-11 4337 6981 <lm>
## 7 Connecticut 1989-02-18 4079 6988 <lm>
## 8 Connecticut 1989-02-25 3556 6995 <lm>
## 9 Connecticut 1989-03-04 3826 7002 <lm>
## 10 Connecticut 1989-03-11 3515 7009 <lm>
## # ℹ 11,210 more rows