Apply Data 10

# for Core packages
library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# for financial analysis
library(tidyquant)

## Warning: package 'tidyquant' was built under R version 4.3.3

## Loading required package: PerformanceAnalytics

## Warning: package 'PerformanceAnalytics' was built under R version 4.3.3

## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod

## Warning: package 'quantmod' was built under R version 4.3.3

## Loading required package: TTR

## Warning: package 'TTR' was built under R version 4.3.3

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

# for times series
library(timetk)

## Warning: package 'timetk' was built under R version 4.3.3

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl

## # A tibble: 11,058 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,048 more rows

claims_tbl %>%
  plot_time_series(.date_var = date, .value = claims)

Box plots

claims_tbl %>% count(symbol)

## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1843
## 2 Massachusetts  1843
## 3 Maine          1843
## 4 New Hampshire  1843
## 5 Rhode Island   1843
## 6 Vermont        1843

claims_tbl %>%
  filter_by_time(.date_var = date, .end_date = "2024") %>%
  group_by(symbol) %>%
  plot_time_series_boxplot(
      .date_var = date,
      .value    = claims,
      .period   = "1 year",
      .facet_ncol = 2)

Regression plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series_regression(
    .date_var = date,
    .facet_ncol = 2,
    .formula = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
    .show_summary = FALSE)

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
  group_by(symbol) %>%
  plot_acf_diagnostics(date, claims, .lags = "7 days")

Seasonality

claims_tbl %>%
  plot_seasonal_diagnostics(date, claims)

claims_tbl %>% count(claims)

## # A tibble: 5,332 × 2
##    claims     n
##     <int> <int>
##  1    152     1
##  2    154     1
##  3    184     2
##  4    189     1
##  5    200     1
##  6    201     1
##  7    203     1
##  8    205     1
##  9    206     1
## 10    211     2
## # ℹ 5,322 more rows

claims_tbl %>%
  group_by(symbol) %>%
  plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
  group_by(symbol) %>%
  plot_stl_diagnostics(
    date, claims,
    .feature_set = c("observed", "season", "trend", "remainder"))

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

daily data

claims_tbl %>%
  group_by(symbol) %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

summarize it by quarter

claims_tbl %>%
  group_by(symbol) %>%
  summarise_by_time(.date_var = date, .by = "month", claims = mean(claims)) %>%
  plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

Filter By Time

claims_tbl %>%
  group_by(symbol) %>%
  filter_by_time(.date_var = date,
                 .start_date = "2013-09", 
                 .end_date = "2013") %>%
  plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

claims_tbl %>%
  group_by(symbol) %>%
  pad_by_time(date, .by = "day", .pad_value = 0)

## # A tibble: 77,370 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 77,360 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
  head(10) %>%
  mutate(rolling_avg_2 = slidify_vec(claims, mean,
                                     .period = 2,
                                     .align = "right",
                                     .partial = TRUE))

## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.

# Rolling regressions are easy to implement using ' .unlist = FALSE'
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90,
                   .unlist = FALSE, .align = "right")

claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(date = as.numeric(date)) %>%
  # Apply rolling regression 
  mutate(rolling_lm = lm_roll(claims, date)) %>%
  filter(!is.na(rolling_lm))

## # A tibble: 10,524 × 4
## # Groups:   symbol [6]
##    symbol       date claims rolling_lm
##    <fct>       <dbl>  <int> <list>    
##  1 Connecticut  7569   3927 <lm>      
##  2 Connecticut  7576   4471 <lm>      
##  3 Connecticut  7583   4430 <lm>      
##  4 Connecticut  7590   4494 <lm>      
##  5 Connecticut  7597   4894 <lm>      
##  6 Connecticut  7604   4653 <lm>      
##  7 Connecticut  7611   4719 <lm>      
##  8 Connecticut  7618   5347 <lm>      
##  9 Connecticut  7625   4824 <lm>      
## 10 Connecticut  7632   5367 <lm>      
## # ℹ 10,514 more rows

Apply Data 10

John Tomlinson

2024-04-18

Plotting time series

Box plots

Regression plots

Plotting Seasonality and Correlation

Correlation Plots

Seasonality

STL Diagnostics

Time Series Data Wrangling

Summarize by Time

Filter By Time

Padding Data

Sliding (Rolling) Calculations