Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl
## # A tibble: 11,358 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,348 more rows
claims_tbl %>%
plot_time_series(.date_var = date, .value = log(claims))
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = log(claims),
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = log(claims),
.facet_ncol = 2,
.facet_scales = "free"
#, .color_var = week(date))
)
claims_tbl %>%
plot_time_series(date, log(claims),
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
# Customize
.title = "Jobless Claims",
.x_lab = "Date (10-year intervals)",
.y_lab = "Number log-adjusted",
.color_lab = "month")
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "2024") %>%
group_by(symbol) %>%
plot_time_series_boxplot(
.date_var = date,
.value = log(claims),
.period = "1 year",
.facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = FALSE)
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, log(claims), .lags = "3 years")
claims_tbl %>%
plot_seasonal_diagnostics(date, log(claims))
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1893
## 2 Massachusetts 1893
## 3 Maine 1893
## 4 New Hampshire 1893
## 5 Rhode Island 1893
## 6 Vermont 1893
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, log(claims))
## STL Diagnostics
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, log(claims),
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
Daily Data
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(date, log(claims), .facet_ncol = 2, .interactive = FALSE)
Summarized by Quarter
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, volume = sum(log(claims)), .by = "quarter") %>%
plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, claims = mean(log(claims)), .by = "month") %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Filter By Time
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "2023-04",
.end_date = "2023") %>%
plot_time_series(date, log(claims), .facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,358 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,348 more rows
## Sliding (Rolling) Calculations
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(log(claims), mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 9.03
## 2 Connecticut 1989-01-14 6503 8.90
## 3 Connecticut 1989-01-21 3821 8.51
## 4 Connecticut 1989-01-28 4663 8.35
## 5 Connecticut 1989-02-04 4162 8.39
## 6 Connecticut 1989-02-11 4337 8.35
## 7 Connecticut 1989-02-18 4079 8.34
## 8 Connecticut 1989-02-25 3556 8.24
## 9 Connecticut 1989-03-04 3826 8.21
## 10 Connecticut 1989-03-11 3515 8.21
# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 90,
.unlist = FALSE, .align = "right")
claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(date)) %>%
# Apply rolling regression
mutate(rolling_lm = lm_roll(symbol, claims, numeric_date)) %>%
filter(!is.na(rolling_lm))
## Warning: There were 22716 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `rolling_lm = lm_roll(symbol, claims, numeric_date)`.
## ℹ In group 1: `symbol = Connecticut`.
## Caused by warning in `model.response()`:
## ! using type = "numeric" with a factor response will be ignored
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 22715 remaining warnings.
## # A tibble: 10,824 × 5
## # Groups: symbol [6]
## symbol date claims numeric_date rolling_lm
## <fct> <date> <int> <dbl> <list>
## 1 Connecticut 1990-09-22 3927 7569 <lm>
## 2 Connecticut 1990-09-29 4471 7576 <lm>
## 3 Connecticut 1990-10-06 4430 7583 <lm>
## 4 Connecticut 1990-10-13 4494 7590 <lm>
## 5 Connecticut 1990-10-20 4894 7597 <lm>
## 6 Connecticut 1990-10-27 4653 7604 <lm>
## 7 Connecticut 1990-11-03 4719 7611 <lm>
## 8 Connecticut 1990-11-10 5347 7618 <lm>
## 9 Connecticut 1990-11-17 4824 7625 <lm>
## 10 Connecticut 1990-11-24 5367 7632 <lm>
## # ℹ 10,814 more rows