# for Core packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for financial analysis
library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.5.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Warning: package 'xts' was built under R version 4.5.3
## Warning: package 'zoo' was built under R version 4.5.3
## Warning: package 'quantmod' was built under R version 4.5.3
## Warning: package 'TTR' was built under R version 4.5.3
## Warning: package 'PerformanceAnalytics' was built under R version 4.5.3
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.12 ──
## ✔ PerformanceAnalytics 2.1.0      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.28     ✔ xts                  0.14.2
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# for times series
library(timetk)
## Warning: package 'timetk' was built under R version 4.5.3
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl
## # A tibble: 11,664 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,654 more rows
claims_tbl %>%
  plot_time_series(.date_var = date, .value = claims)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the timetk package.
##   Please report the issue at
##   <https://github.com/business-science/timetk/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • colour : "Legend"
claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(
    .date_var = date,
    .value    = log(claims),
    .facet_ncol   = 2,
    .facet_scales = "free_y",
    .color_var    = year(date)
  )
claims_tbl %>%
  plot_time_series(
    date, claims,
    .color_var = month(date, label = TRUE),

    # Returns static ggplot
    .interactive = FALSE,

    # Customize
    .title     = "New England Initial Unemployment Claims",
    .x_lab     = "Date",
    .y_lab     = "Initial Claims",
    .color_lab = "Month"
  )

Box plots

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1944
## 2 Massachusetts  1944
## 3 Maine          1944
## 4 New Hampshire  1944
## 5 Rhode Island   1944
## 6 Vermont        1944
claims_tbl %>%
  filter_by_time(.date_var = date, .end_date = "1995") %>%
  group_by(symbol) %>%
  plot_time_series_boxplot(
    .date_var   = date,
    .value      = claims,
    .period     = "1 year",
    .facet_ncol = 2
  )
## Ignoring unknown labels:
## • colour : "Legend"

Regression plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series_regression(
    .date_var   = date,
    .facet_ncol = 2,
    .formula    = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
    .show_summary = FALSE
  )

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_acf_diagnostics(
    date, claims,
    .lags = "2 years")

Seasonality

claims_tbl %>%
  plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1944
## 2 Massachusetts  1944
## 3 Maine          1944
## 4 New Hampshire  1944
## 5 Rhode Island   1944
## 6 Vermont        1944
claims_tbl %>%
  group_by(symbol) %>%
  plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
  group_by(symbol) %>%
  plot_stl_diagnostics(
    date, claims,
    .feature_set = c("observed", "season", "trend", "remainder")
  )
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, claims = sum(claims), .by = "quarter") %>%
  plot_time_series(
    date, claims,
    .facet_ncol = 2,
    .interactive = FALSE
  )
## Ignoring unknown labels:
## • colour : "Legend"

claims_tbl %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, claims = mean(claims), .by = "month") %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)
## Ignoring unknown labels:
## • colour : "Legend"

Filter By Time

claims_tbl %>%
  group_by(symbol) %>%
  filter_by_time(.date_var = date,
                 .start_date = "2008-01",
                 .end_date = "2010") %>%
  plot_time_series(date, claims, .facet_ncol = 2)
## Ignoring unknown labels:
## • colour : "Legend"

Padding Data

claims_tbl %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "week", .pad_value = 0)
## # A tibble: 11,664 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,654 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
  head(10) %>%
  mutate(rolling_avg_4 = slidify_vec(claims, mean,
                                     .period = 4,
                                     .align = "right",
                                     .partial = TRUE))
## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_4
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         6223 
##  4 Connecticut 1989-01-28   4663         5833 
##  5 Connecticut 1989-02-04   4162         4787.
##  6 Connecticut 1989-02-11   4337         4246.
##  7 Connecticut 1989-02-18   4079         4310.
##  8 Connecticut 1989-02-25   3556         4034.
##  9 Connecticut 1989-03-04   3826         3950.
## 10 Connecticut 1989-03-11   3515         3744
# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 26,
                   .unlist = FALSE, .align = "right")

claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(lag_claims = lag(claims),
         numeric_date = as.numeric(date)) %>%
  filter(!is.na(lag_claims)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(claims, lag_claims, numeric_date)) %>%
  filter(!is.na(rolling_lm))
## # A tibble: 11,508 × 6
## # Groups:   symbol [6]
##    symbol      date       claims lag_claims numeric_date rolling_lm
##    <fct>       <date>      <int>      <int>        <dbl> <list>    
##  1 Connecticut 1989-07-08   7010       5232         7128 <lm>      
##  2 Connecticut 1989-07-15   5630       7010         7135 <lm>      
##  3 Connecticut 1989-07-22   4590       5630         7142 <lm>      
##  4 Connecticut 1989-07-29   4929       4590         7149 <lm>      
##  5 Connecticut 1989-08-05   7029       4929         7156 <lm>      
##  6 Connecticut 1989-08-12   3704       7029         7163 <lm>      
##  7 Connecticut 1989-08-19   4082       3704         7170 <lm>      
##  8 Connecticut 1989-08-26   3373       4082         7177 <lm>      
##  9 Connecticut 1989-09-02   2902       3373         7184 <lm>      
## 10 Connecticut 1989-09-09   2856       2902         7191 <lm>      
## # ℹ 11,498 more rows