# for Core packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.4     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.3.1     ✔ stringr 1.5.1
## ✔ readr   2.1.2     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# for financial analysis
library(tidyquant)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# for times series
library(timetk)

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl
## # A tibble: 11,052 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,042 more rows
claims_tbl %>%
    plot_time_series(.date_var = date, .value = claims)

Box plots

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1842
## 2 Massachusetts  1842
## 3 Maine          1842
## 4 New Hampshire  1842
## 5 Rhode Island   1842
## 6 Vermont        1842
claims_tbl %>%
  filter_by_time(.date_var = date, .end_date = "2021") %>%
  group_by(symbol) %>%
  plot_time_series_boxplot(.date_var = date,
                           .value    = claims,
                           .period   = "1 year",
                           .facet_ncol = 2)
## Warning: The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

Regression plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(.date_var = date, 
                                .facet_ncol = 2, 
                                .formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE), 
                                .show_summary = FALSE)

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(date, claims, .lags = "year")
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the plotly package.
##   Please report the issue at <https://github.com/plotly/plotly.R/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Seasonality

claims_tbl %>% 
    plot_seasonal_diagnostics(date, claims)
## Warning: The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?
## The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, claims,
        .frequency = "auto", .trend = "auto",
        .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

Filter By Time

claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date, .start_date = "2024-01-01", .end_date = "2024") %>%
    plot_time_series(date, claims, .facet_ncol = 2) 

Padding Data

claims_tbl %>% 
    group_by(symbol) %>%
    pad_by_time(date, .by = "day", .fill_na_direction = "down")
## # A tibble: 77,328 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08   8345
##  3 Connecticut 1989-01-09   8345
##  4 Connecticut 1989-01-10   8345
##  5 Connecticut 1989-01-11   8345
##  6 Connecticut 1989-01-12   8345
##  7 Connecticut 1989-01-13   8345
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15   6503
## 10 Connecticut 1989-01-16   6503
## # ℹ 77,318 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
    head(8) %>%
    mutate(rolling_avg_2 = slidify_vec(claims, mean, 
                                       .period = 2, 
                                       .align = "left", 
                                       .partial = TRUE))
## # A tibble: 8 × 4
##   symbol      date       claims rolling_avg_2
##   <fct>       <date>      <int>         <dbl>
## 1 Connecticut 1989-01-07   8345         7424 
## 2 Connecticut 1989-01-14   6503         5162 
## 3 Connecticut 1989-01-21   3821         4242 
## 4 Connecticut 1989-01-28   4663         4412.
## 5 Connecticut 1989-02-04   4162         4250.
## 6 Connecticut 1989-02-11   4337         4208 
## 7 Connecticut 1989-02-18   4079         3818.
## 8 Connecticut 1989-02-25   3556         3556