Apply 11

# for Core packages
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# for financial analysis
library(tidyquant)

## Warning: package 'tidyquant' was built under R version 4.4.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Warning: package 'xts' was built under R version 4.4.3

## Warning: package 'zoo' was built under R version 4.4.3

## Warning: package 'quantmod' was built under R version 4.4.3

## Warning: package 'PerformanceAnalytics' was built under R version 4.4.3

## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ dplyr::filter()                masks stats::filter()
## ✖ xts::first()                   masks dplyr::first()
## ✖ dplyr::lag()                   masks stats::lag()
## ✖ xts::last()                    masks dplyr::last()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# for times series
library(timetk)

## Warning: package 'timetk' was built under R version 4.4.3

## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl

## # A tibble: 11,370 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,360 more rows

claims_tbl %>%
    plot_time_series(.date_var = date, .value = claims)

claims_tbl

## # A tibble: 11,370 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,360 more rows

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date,
        .value = claims,
        .facet_ncol = 2,
        .facet_scales = "free",
        .interactive = FALSE)

Box plots

claims_tbl

## # A tibble: 11,370 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,360 more rows

claims_tbl %>%
    filter_by_time(.date_var = date, .end_date = "1991-08-17
") %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var = date,
        .value = claims,
        .period      = "1 year",
        .facet_ncol  = 2)

## Warning: There were 30 warnings in `dplyr::mutate()`.
## The first warning was:
## ℹ In argument: `.value_smooth = auto_smooth(...)`.
## ℹ In group 1: `symbol = Connecticut`.
## Caused by warning in `simpleLoess()`:
## ! span too small.   fewer data values than degrees of freedom.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 29 remaining warnings.

Regression plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var = date,
        .facet_ncol = 2,
        .formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
        .show_summary =  FALSE)

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        date, claims, 
        .lags = "7 days")

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        date, claims,
        .ccf_vars = c(date, claims),
        .lags = "3 months")

Seasonality

claims_tbl %>%
    plot_seasonal_diagnostics(date, claims)

claims_tbl %>% count(symbol)

## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1895
## 2 Massachusetts  1895
## 3 Maine          1895
## 4 New Hampshire  1895
## 5 Rhode Island   1895
## 6 Vermont        1895

claims_tbl %>%
    group_by(symbol) %>%
    plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, claims,
        .feature_set = c("observed", "season", "trend", "remainder")
    )

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, claims = sum(claims), .by = "quarter") %>%
    plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

Filter By Time

claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date,
                   .start_date = "1989-01-07
",
                   .end_date = "1995-03-18
") %>%
    plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, .by = "day", .pad_value = 0)

## # A tibble: 79,554 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 79,544 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 = slidify_vec(claims, mean,
                                         .period = 2,
                                         .align = "right",
                                         .partial = TRUE))

## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.

# Make the rolling function
roll_avg_30 <- slidify(.f = mean, .period = 30, .align = "center", .partial = TRUE)

# Apply the rolling function and plot
claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  # Apply Sliding Function
  mutate(rolling_avg_30 = roll_avg_30(claims)) %>%
  tidyr::pivot_longer(
    cols = c(claims, rolling_avg_30),
    names_to = "name",
    values_to = "value"
  ) %>%
  plot_time_series(
    .date_var = date,
    .value = value,           # <- This is now the correct value column
    .color_var = name,        # <- This maps "claims" vs "rolling_avg_30"
    .facet_ncol = 2,
    .smooth = FALSE, 
    .interactive = FALSE
  )

Apply 11

Thomas Hall

2025-05-06

Plotting time series

Box plots

Regression plots

Plotting Seasonality and Correlation

Correlation Plots

Seasonality

STL Diagnostics

Time Series Data Wrangling

Summarize by Time

Filter By Time

Padding Data

Sliding (Rolling) Calculations