# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.28 ✔ xts 0.14.1── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the timetk package.
## Please report the issue at
## <https://github.com/business-science/timetk/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2
)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE
)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8118 -0.2142 -0.0363 0.1725 3.2068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.614e+00 2.767e-02 311.269 < 2e-16 ***
## as.numeric(date) -1.913e-05 1.935e-06 -9.891 < 2e-16 ***
## month(date, label = TRUE).L -2.814e-01 2.634e-02 -10.685 < 2e-16 ***
## month(date, label = TRUE).Q 4.352e-01 2.627e-02 16.565 < 2e-16 ***
## month(date, label = TRUE).C 6.298e-02 2.621e-02 2.403 0.01637 *
## month(date, label = TRUE)^4 4.958e-01 2.631e-02 18.849 < 2e-16 ***
## month(date, label = TRUE)^5 -1.210e-02 2.649e-02 -0.457 0.64792
## month(date, label = TRUE)^6 -2.454e-02 2.657e-02 -0.924 0.35578
## month(date, label = TRUE)^7 -1.214e-01 2.637e-02 -4.604 4.41e-06 ***
## month(date, label = TRUE)^8 5.237e-02 2.630e-02 1.992 0.04656 *
## month(date, label = TRUE)^9 1.875e-01 2.637e-02 7.111 1.61e-12 ***
## month(date, label = TRUE)^10 -9.883e-02 2.633e-02 -3.753 0.00018 ***
## month(date, label = TRUE)^11 2.105e-02 2.636e-02 0.799 0.42459
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3355 on 1933 degrees of freedom
## Multiple R-squared: 0.3288, Adjusted R-squared: 0.3246
## F-statistic: 78.91 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5744 -0.2182 -0.0520 0.1720 3.4061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.225e+00 3.344e-02 275.905 < 2e-16 ***
## as.numeric(date) -2.361e-05 2.337e-06 -10.100 < 2e-16 ***
## month(date, label = TRUE).L -6.932e-02 3.182e-02 -2.178 0.029515 *
## month(date, label = TRUE).Q 5.922e-01 3.174e-02 18.658 < 2e-16 ***
## month(date, label = TRUE).C 1.857e-01 3.167e-02 5.862 5.37e-09 ***
## month(date, label = TRUE)^4 2.916e-01 3.179e-02 9.174 < 2e-16 ***
## month(date, label = TRUE)^5 -5.067e-02 3.200e-02 -1.584 0.113470
## month(date, label = TRUE)^6 -1.111e-01 3.211e-02 -3.459 0.000553 ***
## month(date, label = TRUE)^7 -3.922e-03 3.187e-02 -0.123 0.902055
## month(date, label = TRUE)^8 3.341e-02 3.177e-02 1.051 0.293223
## month(date, label = TRUE)^9 9.801e-02 3.186e-02 3.077 0.002124 **
## month(date, label = TRUE)^10 -8.764e-02 3.182e-02 -2.755 0.005933 **
## month(date, label = TRUE)^11 1.453e-02 3.185e-02 0.456 0.648223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4054 on 1933 degrees of freedom
## Multiple R-squared: 0.2386, Adjusted R-squared: 0.2339
## F-statistic: 50.47 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7840 -0.2403 -0.0668 0.1902 3.4876
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.456e+00 3.177e-02 266.188 < 2e-16 ***
## as.numeric(date) -9.187e-05 2.221e-06 -41.371 < 2e-16 ***
## month(date, label = TRUE).L -2.002e-01 3.023e-02 -6.621 4.60e-11 ***
## month(date, label = TRUE).Q 8.158e-01 3.016e-02 27.052 < 2e-16 ***
## month(date, label = TRUE).C 2.228e-01 3.009e-02 7.403 1.98e-13 ***
## month(date, label = TRUE)^4 2.191e-01 3.020e-02 7.255 5.77e-13 ***
## month(date, label = TRUE)^5 -2.072e-01 3.040e-02 -6.815 1.26e-11 ***
## month(date, label = TRUE)^6 -2.378e-02 3.050e-02 -0.780 0.4358
## month(date, label = TRUE)^7 -1.317e-01 3.028e-02 -4.350 1.43e-05 ***
## month(date, label = TRUE)^8 5.578e-02 3.019e-02 1.848 0.0648 .
## month(date, label = TRUE)^9 1.229e-01 3.027e-02 4.059 5.12e-05 ***
## month(date, label = TRUE)^10 -7.245e-02 3.023e-02 -2.397 0.0166 *
## month(date, label = TRUE)^11 -4.985e-02 3.026e-02 -1.647 0.0997 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3852 on 1933 degrees of freedom
## Multiple R-squared: 0.5808, Adjusted R-squared: 0.5782
## F-statistic: 223.2 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2343 -0.3688 -0.0618 0.2738 3.8308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.253e+00 4.513e-02 160.707 < 2e-16 ***
## as.numeric(date) -3.579e-05 3.155e-06 -11.344 < 2e-16 ***
## month(date, label = TRUE).L -2.066e-01 4.296e-02 -4.810 1.63e-06 ***
## month(date, label = TRUE).Q 4.368e-01 4.285e-02 10.196 < 2e-16 ***
## month(date, label = TRUE).C 2.261e-01 4.275e-02 5.288 1.38e-07 ***
## month(date, label = TRUE)^4 3.041e-01 4.290e-02 7.089 1.89e-12 ***
## month(date, label = TRUE)^5 -3.490e-02 4.320e-02 -0.808 0.41925
## month(date, label = TRUE)^6 -3.508e-03 4.334e-02 -0.081 0.93550
## month(date, label = TRUE)^7 -1.280e-01 4.301e-02 -2.976 0.00296 **
## month(date, label = TRUE)^8 8.625e-02 4.289e-02 2.011 0.04446 *
## month(date, label = TRUE)^9 1.790e-01 4.300e-02 4.163 3.28e-05 ***
## month(date, label = TRUE)^10 -1.243e-01 4.295e-02 -2.894 0.00385 **
## month(date, label = TRUE)^11 7.915e-03 4.299e-02 0.184 0.85394
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5472 on 1933 degrees of freedom
## Multiple R-squared: 0.1618, Adjusted R-squared: 0.1566
## F-statistic: 31.09 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0239 -0.2602 -0.0668 0.1755 3.4221
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.100e+00 3.583e-02 226.100 < 2e-16 ***
## as.numeric(date) -5.177e-05 2.505e-06 -20.672 < 2e-16 ***
## month(date, label = TRUE).L -3.388e-01 3.410e-02 -9.935 < 2e-16 ***
## month(date, label = TRUE).Q 5.761e-01 3.401e-02 16.938 < 2e-16 ***
## month(date, label = TRUE).C 1.804e-01 3.394e-02 5.316 1.18e-07 ***
## month(date, label = TRUE)^4 4.491e-01 3.406e-02 13.187 < 2e-16 ***
## month(date, label = TRUE)^5 6.250e-02 3.429e-02 1.823 0.068498 .
## month(date, label = TRUE)^6 -1.254e-01 3.440e-02 -3.645 0.000274 ***
## month(date, label = TRUE)^7 -2.261e-02 3.414e-02 -0.662 0.507937
## month(date, label = TRUE)^8 5.412e-02 3.405e-02 1.590 0.112106
## month(date, label = TRUE)^9 1.822e-01 3.413e-02 5.337 1.06e-07 ***
## month(date, label = TRUE)^10 -1.906e-01 3.409e-02 -5.591 2.58e-08 ***
## month(date, label = TRUE)^11 1.037e-02 3.413e-02 0.304 0.761228
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4344 on 1933 degrees of freedom
## Multiple R-squared: 0.3619, Adjusted R-squared: 0.3579
## F-statistic: 91.35 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1343 -0.2459 -0.0461 0.2005 3.2072
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.150e+00 3.276e-02 218.221 < 2e-16 ***
## as.numeric(date) -4.448e-05 2.290e-06 -19.418 < 2e-16 ***
## month(date, label = TRUE).L -8.413e-02 3.118e-02 -2.698 0.00704 **
## month(date, label = TRUE).Q 7.201e-01 3.110e-02 23.151 < 2e-16 ***
## month(date, label = TRUE).C 5.345e-01 3.104e-02 17.222 < 2e-16 ***
## month(date, label = TRUE)^4 8.826e-02 3.115e-02 2.834 0.00465 **
## month(date, label = TRUE)^5 -3.269e-01 3.136e-02 -10.424 < 2e-16 ***
## month(date, label = TRUE)^6 1.307e-03 3.146e-02 0.042 0.96687
## month(date, label = TRUE)^7 -1.371e-01 3.123e-02 -4.391 1.19e-05 ***
## month(date, label = TRUE)^8 -2.954e-03 3.114e-02 -0.095 0.92441
## month(date, label = TRUE)^9 1.933e-01 3.122e-02 6.193 7.19e-10 ***
## month(date, label = TRUE)^10 -1.706e-01 3.118e-02 -5.472 5.04e-08 ***
## month(date, label = TRUE)^11 7.481e-02 3.121e-02 2.397 0.01662 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3973 on 1933 degrees of freedom
## Multiple R-squared: 0.4227, Adjusted R-squared: 0.4191
## F-statistic: 117.9 on 12 and 1933 DF, p-value: < 2.2e-16
##
## ----
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
.date_var = date,
.value = claims,
.lags = "2 years",
.show_white_noise_bars = TRUE
)
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.feature_set = c("observed", "season", "trend", "remainder")
)
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(
.date_var = date,
claims = sum(claims),
.by = "quarter"
) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(
.date_var = date,
.start_date = "2008",
.end_date = "2010"
) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,676 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,666 more rows
claims_tbl %>%
group_by(symbol) %>%
mutate(rolling_avg_4 = slidify_vec(
claims, mean,
.period = 4,
.align = "right",
.partial = TRUE
))
## # A tibble: 11,676 × 4
## # Groups: symbol [6]
## symbol date claims rolling_avg_4
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 6223
## 4 Connecticut 1989-01-28 4663 5833
## 5 Connecticut 1989-02-04 4162 4787.
## 6 Connecticut 1989-02-11 4337 4246.
## 7 Connecticut 1989-02-18 4079 4310.
## 8 Connecticut 1989-02-25 3556 4034.
## 9 Connecticut 1989-03-04 3826 3950.
## 10 Connecticut 1989-03-11 3515 3744
## # ℹ 11,666 more rows