library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyquant) # for financial analysis
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
##
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(broom) # for tidy model results
library(umap) # for dimension reduction
## Warning: package 'umap' was built under R version 4.3.3
library(plotly) # for interactive visualization
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
library(ggplot2)
library(lubridate)
library(timetk)
#Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.
start_date <- "1989-01-01"
symbols_txt <- c("CTICLAIMS", # Connecticut
"MEICLAIMS", # Maine
"MAICLAIMS", # Massachusetts
"NHICLAIMS", # New Hampshire
"RIICLAIMS", # Rhode Island
"VTICLAIMS") # Vermont
claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
mutate(symbol = fct_recode(symbol,
"Connecticut" = "CTICLAIMS",
"Maine" = "MEICLAIMS",
"Massachusetts" = "MAICLAIMS",
"New Hampshire" = "NHICLAIMS",
"Rhode Island" = "RIICLAIMS",
"Vermont" = "VTICLAIMS")) %>%
rename(claims = price)
#Plotting time series
claims_tbl
## # A tibble: 11,052 × 3
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-14 6503
## 3 Connecticut 1989-01-21 3821
## 4 Connecticut 1989-01-28 4663
## 5 Connecticut 1989-02-04 4162
## 6 Connecticut 1989-02-11 4337
## 7 Connecticut 1989-02-18 4079
## 8 Connecticut 1989-02-25 3556
## 9 Connecticut 1989-03-04 3826
## 10 Connecticut 1989-03-11 3515
## # ℹ 11,042 more rows
claims_tbl %>%
plot_time_series(.date_var = date, .value = claims)
claims_tbl %>% count(claims)
## # A tibble: 5,331 × 2
## claims n
## <int> <int>
## 1 152 1
## 2 154 1
## 3 184 2
## 4 189 1
## 5 200 1
## 6 201 1
## 7 203 1
## 8 205 1
## 9 206 1
## 10 211 2
## # ℹ 5,321 more rows
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(
.date_var = date,
.value = claims,
.facet_ncol = 2,
.facet_scales = "free",
.interactive = FALSE)
claims_tbl %>% count(claims)
## # A tibble: 5,331 × 2
## claims n
## <int> <int>
## 1 152 1
## 2 154 1
## 3 184 2
## 4 189 1
## 5 200 1
## 6 201 1
## 7 203 1
## 8 205 1
## 9 206 1
## 10 211 2
## # ℹ 5,321 more rows
claims_tbl %>%
plot_time_series(date, claims,
.color_var = month(date, label = TRUE),
# Returns static ggplot
.interactive = FALSE,
.title = "State Unemployment",
.x_lab = "Timeline",
.y_lab = "Claims",
.color_lab = "Month")
#Boxplots
claims_tbl %>% count(claims)
## # A tibble: 5,331 × 2
## claims n
## <int> <int>
## 1 152 1
## 2 154 1
## 3 184 2
## 4 189 1
## 5 200 1
## 6 201 1
## 7 203 1
## 8 205 1
## 9 206 1
## 10 211 2
## # ℹ 5,321 more rows
claims_tbl %>%
plot_time_series_boxplot(.date_var = date,
.value = claims,
.period = "1 year",
.facet_ncol = 2)
#Plotting Seasonality and Correlation ## Correlation plots
claims_tbl %>%
group_by(symbol) %>%
plot_acf_diagnostics(
date, claims,
.lags = "1 year")
##Seasonality
claims_tbl %>%
group_by(symbol) %>%
plot_seasonal_diagnostics(date, claims)
#STL Diagnostics
claims_tbl %>%
group_by(symbol) %>%
plot_stl_diagnostics(
date, claims,
.frequency = "auto", .trend = "auto",
.feature_set = c("observed", "season", "trend", "remainder"),
.interactive = TRUE)
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
#Time Series Data Wrangling ##Summarize by Time
claims_tbl %>%
group_by(symbol) %>%
plot_time_series(date, claims, .facet_ncol = 2, .interactive = TRUE)
claims_tbl %>%
group_by(symbol) %>%
summarise_by_time(.date_var = date, adjusted = mean(claims), .by = "year") %>%
plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = TRUE)
#Filter by time
claims_tbl %>%
group_by(symbol) %>%
filter_by_time(.date_var = date,
.start_date = "2000",
.end_date = "2022") %>%
plot_time_series(date, claims, .facet_ncol = 2)
#Padding Data
claims_tbl %>%
group_by(symbol) %>%
pad_by_time(date, .by = "day", .pad_value = 0)
## # A tibble: 77,328 × 3
## # Groups: symbol [6]
## symbol date claims
## <fct> <date> <int>
## 1 Connecticut 1989-01-07 8345
## 2 Connecticut 1989-01-08 0
## 3 Connecticut 1989-01-09 0
## 4 Connecticut 1989-01-10 0
## 5 Connecticut 1989-01-11 0
## 6 Connecticut 1989-01-12 0
## 7 Connecticut 1989-01-13 0
## 8 Connecticut 1989-01-14 6503
## 9 Connecticut 1989-01-15 0
## 10 Connecticut 1989-01-16 0
## # ℹ 77,318 more rows
#Sliding (rolling) calc
claims_tbl %>%
head(10) %>%
mutate(rolling_avg_2 = slidify_vec(claims, mean,
.period = 2,
.align = "right",
.partial = TRUE))
## # A tibble: 10 × 4
## symbol date claims rolling_avg_2
## <fct> <date> <int> <dbl>
## 1 Connecticut 1989-01-07 8345 8345
## 2 Connecticut 1989-01-14 6503 7424
## 3 Connecticut 1989-01-21 3821 5162
## 4 Connecticut 1989-01-28 4663 4242
## 5 Connecticut 1989-02-04 4162 4412.
## 6 Connecticut 1989-02-11 4337 4250.
## 7 Connecticut 1989-02-18 4079 4208
## 8 Connecticut 1989-02-25 3556 3818.
## 9 Connecticut 1989-03-04 3826 3691
## 10 Connecticut 1989-03-11 3515 3670.
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3),
.period = 90,
.unlist = FALSE,
.align = "right")
claims_tbl %>%
select(symbol, date, claims) %>%
group_by(symbol) %>%
mutate(numeric_date = as.numeric(symbol)) %>%
#Apply rolling regression
mutate(rolling_lm = lm_roll(claims, date, numeric_date)) %>%
filter(!is.na(rolling_lm))
## # A tibble: 10,518 × 5
## # Groups: symbol [6]
## symbol date claims numeric_date rolling_lm
## <fct> <date> <int> <dbl> <list>
## 1 Connecticut 1990-09-22 3927 1 <lm>
## 2 Connecticut 1990-09-29 4471 1 <lm>
## 3 Connecticut 1990-10-06 4430 1 <lm>
## 4 Connecticut 1990-10-13 4494 1 <lm>
## 5 Connecticut 1990-10-20 4894 1 <lm>
## 6 Connecticut 1990-10-27 4653 1 <lm>
## 7 Connecticut 1990-11-03 4719 1 <lm>
## 8 Connecticut 1990-11-10 5347 1 <lm>
## 9 Connecticut 1990-11-17 4824 1 <lm>
## 10 Connecticut 1990-11-24 5367 1 <lm>
## # ℹ 10,508 more rows