# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.5.3
## Warning: package 'zoo' was built under R version 4.5.3
## Warning: package 'quantmod' was built under R version 4.5.3
## Warning: package 'TTR' was built under R version 4.5.3
## Warning: package 'PerformanceAnalytics' was built under R version 4.5.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.12 ──
## ✔ PerformanceAnalytics 2.1.0 ✔ TTR 0.24.4
## ✔ quantmod 0.4.28 ✔ xts 0.14.2
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ dplyr::filter() masks stats::filter()
## ✖ xts::first() masks dplyr::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ xts::last() masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## Warning: package 'timetk' was built under R version 4.5.3
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl
## # A tibble: 11,664 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,654 more rows
claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the timetk package.
## Please report the issue at
## <https://github.com/business-science/timetk/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = log(claims),
.facet_ncol = 2,
.facet_scales = "free_y",
.color_var = year(date)
)
claims_tbl %>%
plot_time_series(
date, claims,
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
# Customize
.title = "New England Initial Unemployment Claims",
.x_lab = "Date",
.y_lab = "Initial Claims",
.color_lab = "Month"
)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1944
## 2 Massachusetts 1944
## 3 Maine 1944
## 4 New Hampshire 1944
## 5 Rhode Island 1944
## 6 Vermont 1944
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "1995") %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2
)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = FALSE
)
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, claims,
.lags = "2 years")
claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1944
## 2 Massachusetts 1944
## 3 Maine 1944
## 4 New Hampshire 1944
## 5 Rhode Island 1944
## 6 Vermont 1944
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.feature_set = c("observed", "season", "trend", "remainder")
)
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, claims = sum(claims), .by = "quarter") %>%
plot_time_series(
date, claims,
.facet_ncol = 2,
.interactive = FALSE
)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, claims = mean(claims), .by = "month") %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "2008-01",
.end_date = "2010") %>%
plot_time_series(date, claims, .facet_ncol = 2)
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,664 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,654 more rows
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_4 = slidify_vec(claims, mean,
.period = 4,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_4
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 6223
## 4 Connecticut 1989-01-28 4663 5833
## 5 Connecticut 1989-02-04 4162 4787.
## 6 Connecticut 1989-02-11 4337 4246.
## 7 Connecticut 1989-02-18 4079 4310.
## 8 Connecticut 1989-02-25 3556 4034.
## 9 Connecticut 1989-03-04 3826 3950.
## 10 Connecticut 1989-03-11 3515 3744
# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 26,
.unlist = FALSE, .align = "right")
claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
mutate(lag_claims = lag(claims),
numeric_date = as.numeric(date)) %>%
filter(!is.na(lag_claims)) %>%
# Apply rolling regression
mutate(rolling_lm = lm_roll(claims, lag_claims, numeric_date)) %>%
filter(!is.na(rolling_lm))
## # A tibble: 11,508 × 6
## # Groups: symbol [6]
## symbol date claims lag_claims numeric_date rolling_lm
## <fct> <date> <int> <int> <dbl> <list>
## 1 Connecticut 1989-07-08 7010 5232 7128 <lm>
## 2 Connecticut 1989-07-15 5630 7010 7135 <lm>
## 3 Connecticut 1989-07-22 4590 5630 7142 <lm>
## 4 Connecticut 1989-07-29 4929 4590 7149 <lm>
## 5 Connecticut 1989-08-05 7029 4929 7156 <lm>
## 6 Connecticut 1989-08-12 3704 7029 7163 <lm>
## 7 Connecticut 1989-08-19 4082 3704 7170 <lm>
## 8 Connecticut 1989-08-26 3373 4082 7177 <lm>
## 9 Connecticut 1989-09-02 2902 3373 7184 <lm>
## 10 Connecticut 1989-09-09 2856 2902 7191 <lm>
## # ℹ 11,498 more rows