# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.28 ✔ xts 0.14.1── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the timetk package.
## Please report the issue at
## <https://github.com/business-science/timetk/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2
)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = TRUE
)
##
## Summary for Group: Connecticut---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8114 -0.2140 -0.0365 0.1725 3.2088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.614e+00 2.769e-02 311.113 < 2e-16 ***
## as.numeric(date) -1.920e-05 1.936e-06 -9.915 < 2e-16 ***
## month(date, label = TRUE).L -2.811e-01 2.634e-02 -10.669 < 2e-16 ***
## month(date, label = TRUE).Q 4.355e-01 2.627e-02 16.573 < 2e-16 ***
## month(date, label = TRUE).C 6.238e-02 2.623e-02 2.379 0.017465 *
## month(date, label = TRUE)^4 4.961e-01 2.631e-02 18.855 < 2e-16 ***
## month(date, label = TRUE)^5 -1.170e-02 2.649e-02 -0.442 0.658730
## month(date, label = TRUE)^6 -2.518e-02 2.659e-02 -0.947 0.343649
## month(date, label = TRUE)^7 -1.212e-01 2.638e-02 -4.595 4.61e-06 ***
## month(date, label = TRUE)^8 5.281e-02 2.630e-02 2.008 0.044829 *
## month(date, label = TRUE)^9 1.867e-01 2.639e-02 7.075 2.08e-12 ***
## month(date, label = TRUE)^10 -9.813e-02 2.635e-02 -3.724 0.000201 ***
## month(date, label = TRUE)^11 2.072e-02 2.636e-02 0.786 0.432071
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3356 on 1932 degrees of freedom
## Multiple R-squared: 0.3289, Adjusted R-squared: 0.3248
## F-statistic: 78.92 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Massachusetts---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5748 -0.2180 -0.0521 0.1722 3.4057
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.224e+00 3.345e-02 275.723 < 2e-16 ***
## as.numeric(date) -2.352e-05 2.339e-06 -10.056 < 2e-16 ***
## month(date, label = TRUE).L -6.979e-02 3.183e-02 -2.193 0.028446 *
## month(date, label = TRUE).Q 5.919e-01 3.175e-02 18.644 < 2e-16 ***
## month(date, label = TRUE).C 1.864e-01 3.169e-02 5.884 4.72e-09 ***
## month(date, label = TRUE)^4 2.913e-01 3.179e-02 9.163 < 2e-16 ***
## month(date, label = TRUE)^5 -5.119e-02 3.201e-02 -1.599 0.109937
## month(date, label = TRUE)^6 -1.102e-01 3.212e-02 -3.432 0.000612 ***
## month(date, label = TRUE)^7 -4.221e-03 3.187e-02 -0.132 0.894635
## month(date, label = TRUE)^8 3.284e-02 3.178e-02 1.033 0.301646
## month(date, label = TRUE)^9 9.907e-02 3.188e-02 3.107 0.001915 **
## month(date, label = TRUE)^10 -8.855e-02 3.183e-02 -2.782 0.005460 **
## month(date, label = TRUE)^11 1.497e-02 3.185e-02 0.470 0.638478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4054 on 1932 degrees of freedom
## Multiple R-squared: 0.2384, Adjusted R-squared: 0.2337
## F-statistic: 50.39 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Maine---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7835 -0.2404 -0.0666 0.1901 3.4851
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.455e+00 3.178e-02 266.013 < 2e-16 ***
## as.numeric(date) -9.179e-05 2.223e-06 -41.301 < 2e-16 ***
## month(date, label = TRUE).L -2.007e-01 3.024e-02 -6.635 4.20e-11 ***
## month(date, label = TRUE).Q 8.154e-01 3.016e-02 27.037 < 2e-16 ***
## month(date, label = TRUE).C 2.235e-01 3.010e-02 7.424 1.69e-13 ***
## month(date, label = TRUE)^4 2.188e-01 3.020e-02 7.244 6.24e-13 ***
## month(date, label = TRUE)^5 -2.077e-01 3.041e-02 -6.830 1.14e-11 ***
## month(date, label = TRUE)^6 -2.298e-02 3.052e-02 -0.753 0.4515
## month(date, label = TRUE)^7 -1.320e-01 3.028e-02 -4.359 1.38e-05 ***
## month(date, label = TRUE)^8 5.524e-02 3.020e-02 1.829 0.0675 .
## month(date, label = TRUE)^9 1.239e-01 3.029e-02 4.089 4.50e-05 ***
## month(date, label = TRUE)^10 -7.332e-02 3.025e-02 -2.424 0.0154 *
## month(date, label = TRUE)^11 -4.943e-02 3.026e-02 -1.633 0.1026
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3852 on 1932 degrees of freedom
## Multiple R-squared: 0.5805, Adjusted R-squared: 0.5779
## F-statistic: 222.8 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: New Hampshire---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2393 -0.3689 -0.0615 0.2744 3.8260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.251e+00 4.515e-02 160.608 < 2e-16 ***
## as.numeric(date) -3.564e-05 3.157e-06 -11.289 < 2e-16 ***
## month(date, label = TRUE).L -2.075e-01 4.296e-02 -4.830 1.47e-06 ***
## month(date, label = TRUE).Q 4.362e-01 4.284e-02 10.181 < 2e-16 ***
## month(date, label = TRUE).C 2.275e-01 4.276e-02 5.320 1.16e-07 ***
## month(date, label = TRUE)^4 3.036e-01 4.290e-02 7.075 2.08e-12 ***
## month(date, label = TRUE)^5 -3.585e-02 4.320e-02 -0.830 0.40674
## month(date, label = TRUE)^6 -1.970e-03 4.335e-02 -0.045 0.96376
## month(date, label = TRUE)^7 -1.286e-01 4.301e-02 -2.989 0.00284 **
## month(date, label = TRUE)^8 8.520e-02 4.289e-02 1.986 0.04713 *
## month(date, label = TRUE)^9 1.810e-01 4.303e-02 4.206 2.72e-05 ***
## month(date, label = TRUE)^10 -1.260e-01 4.296e-02 -2.932 0.00341 **
## month(date, label = TRUE)^11 8.720e-03 4.299e-02 0.203 0.83929
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5472 on 1932 degrees of freedom
## Multiple R-squared: 0.1617, Adjusted R-squared: 0.1565
## F-statistic: 31.06 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Rhode Island---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0245 -0.2607 -0.0667 0.1756 3.4216
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.099e+00 3.584e-02 225.980 < 2e-16 ***
## as.numeric(date) -5.165e-05 2.506e-06 -20.610 < 2e-16 ***
## month(date, label = TRUE).L -3.395e-01 3.410e-02 -9.956 < 2e-16 ***
## month(date, label = TRUE).Q 5.755e-01 3.401e-02 16.924 < 2e-16 ***
## month(date, label = TRUE).C 1.816e-01 3.394e-02 5.349 9.88e-08 ***
## month(date, label = TRUE)^4 4.486e-01 3.405e-02 13.174 < 2e-16 ***
## month(date, label = TRUE)^5 6.173e-02 3.429e-02 1.800 0.071990 .
## month(date, label = TRUE)^6 -1.242e-01 3.441e-02 -3.608 0.000316 ***
## month(date, label = TRUE)^7 -2.306e-02 3.414e-02 -0.675 0.499492
## month(date, label = TRUE)^8 5.326e-02 3.405e-02 1.564 0.117914
## month(date, label = TRUE)^9 1.838e-01 3.415e-02 5.381 8.32e-08 ***
## month(date, label = TRUE)^10 -1.920e-01 3.410e-02 -5.629 2.08e-08 ***
## month(date, label = TRUE)^11 1.103e-02 3.412e-02 0.323 0.746639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4343 on 1932 degrees of freedom
## Multiple R-squared: 0.3616, Adjusted R-squared: 0.3577
## F-statistic: 91.21 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
##
## Summary for Group: Vermont---
## Call:
## stats::lm(formula = .formula, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1350 -0.2467 -0.0452 0.2003 3.2066
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.148e+00 3.277e-02 218.126 < 2e-16 ***
## as.numeric(date) -4.435e-05 2.292e-06 -19.352 < 2e-16 ***
## month(date, label = TRUE).L -8.489e-02 3.118e-02 -2.723 0.00654 **
## month(date, label = TRUE).Q 7.195e-01 3.110e-02 23.137 < 2e-16 ***
## month(date, label = TRUE).C 5.357e-01 3.104e-02 17.259 < 2e-16 ***
## month(date, label = TRUE)^4 8.776e-02 3.114e-02 2.818 0.00488 **
## month(date, label = TRUE)^5 -3.277e-01 3.136e-02 -10.451 < 2e-16 ***
## month(date, label = TRUE)^6 2.623e-03 3.147e-02 0.083 0.93357
## month(date, label = TRUE)^7 -1.376e-01 3.122e-02 -4.407 1.10e-05 ***
## month(date, label = TRUE)^8 -3.854e-03 3.113e-02 -0.124 0.90150
## month(date, label = TRUE)^9 1.950e-01 3.123e-02 6.244 5.24e-10 ***
## month(date, label = TRUE)^10 -1.720e-01 3.119e-02 -5.517 3.92e-08 ***
## month(date, label = TRUE)^11 7.550e-02 3.120e-02 2.420 0.01563 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3972 on 1932 degrees of freedom
## Multiple R-squared: 0.423, Adjusted R-squared: 0.4194
## F-statistic: 118 on 12 and 1932 DF, p-value: < 2.2e-16
##
## ----
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
.date_var = date,
.value = claims,
.lags = "2 years",
.show_white_noise_bars = TRUE
)
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.feature_set = c("observed", "season", "trend", "remainder")
)
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(
.date_var = date,
claims = sum(claims),
.by = "quarter"
) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(
.date_var = date,
.start_date = "2008",
.end_date = "2010"
) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,670 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,660 more rows
claims_tbl %>%
group_by(symbol) %>%
mutate(rolling_avg_4 = slidify_vec(
claims, mean,
.period = 4,
.align = "right",
.partial = TRUE
))
## # A tibble: 11,670 × 4
## # Groups: symbol [6]
## symbol date claims rolling_avg_4
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 6223
## 4 Connecticut 1989-01-28 4663 5833
## 5 Connecticut 1989-02-04 4162 4787.
## 6 Connecticut 1989-02-11 4337 4246.
## 7 Connecticut 1989-02-18 4079 4310.
## 8 Connecticut 1989-02-25 3556 4034.
## 9 Connecticut 1989-03-04 3826 3950.
## 10 Connecticut 1989-03-11 3515 3744
## # ℹ 11,660 more rows