# for Core packages
library(tidyverse)
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
##
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# for times series
library(timetk)
Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
The following is the replication of Matt Dancho’s tutorial on this page
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
## symbol n
## <fct> <int>
## 1 Connecticut 1840
## 2 Massachusetts 1840
## 3 Maine 1840
## 4 New Hampshire 1840
## 5 Rhode Island 1840
## 6 Vermont 1840
claims_tbl %>%
filter_by_time(.date_var = date, .end_date = "2021") %>%
group_by(symbol) %>%
plot_time_series_boxplot(.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
plot_time_series_regression(
.date_var = date,
.facet_ncol = 2,
.formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
.show_summary = FALSE)
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(date,
claims, .lags = "7 days")
claims_tbl %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(date, claims,
.feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date, .start_date = "1989-01-07", .end_date = "2024") %>%
plot_time_series(date, claims, .facet_ncol = 2)
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .fill_na_direction = "down")
## # A tibble: 77,244 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 8345
## 3 Connecticut 1989-01-09 8345
## 4 Connecticut 1989-01-10 8345
## 5 Connecticut 1989-01-11 8345
## 6 Connecticut 1989-01-12 8345
## 7 Connecticut 1989-01-13 8345
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 6503
## 10 Connecticut 1989-01-16 6503
## # ℹ 77,234 more rows
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(claims, mean,
.period = 2,
.align = "left",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 7424
## 2 Connecticut 1989-01-14 6503 5162
## 3 Connecticut 1989-01-21 3821 4242
## 4 Connecticut 1989-01-28 4663 4412.
## 5 Connecticut 1989-02-04 4162 4250.
## 6 Connecticut 1989-02-11 4337 4208
## 7 Connecticut 1989-02-18 4079 3818.
## 8 Connecticut 1989-02-25 3556 3691
## 9 Connecticut 1989-03-04 3826 3670.
## 10 Connecticut 1989-03-11 3515 3515
# Make the rolling function
roll_avg_30 <- slidify(.f = mean, .period = 30, .align = "center", .partial = TRUE)
# Apply the rolling function
claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
# Apply Sliding Function
mutate(rolling_avg_30 = roll_avg_30(claims)) %>%
tidyr::pivot_longer(cols = c(claims, rolling_avg_30)) %>%
plot_time_series(date, symbol, .color_var = name,
.facet_ncol = 2, .smooth = FALSE,
.interactive = FALSE)