Apply 10

# for Core packages
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.4     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

# for financial analysis
library(tidyquant)

## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

# for times series
library(timetk)

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl

## # A tibble: 11,058 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 11,048 more rows

claims_tbl %>%
    plot_time_series(.date_var = date, .value = claims)

claims_tbl %>% count(claims)

## # A tibble: 5,332 × 2
##    claims     n
##     <int> <int>
##  1    152     1
##  2    154     1
##  3    184     2
##  4    189     1
##  5    200     1
##  6    201     1
##  7    203     1
##  8    205     1
##  9    206     1
## 10    211     2
## # ℹ 5,322 more rows

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(.date_var = date,
        .value = claims,
        .facet_ncol = 2,
        .facet_scales = "free",
        .interactive = FALSE)

Box plots

claims_tbl %>% count(claims)

## # A tibble: 5,332 × 2
##    claims     n
##     <int> <int>
##  1    152     1
##  2    154     1
##  3    184     2
##  4    189     1
##  5    200     1
##  6    201     1
##  7    203     1
##  8    205     1
##  9    206     1
## 10    211     2
## # ℹ 5,322 more rows

claims_tbl %>%
    plot_time_series_boxplot(.date_var = date,
        .value = claims,
        .period = "1 year",
        .facet_ncol = 2)

Regression plots

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(date, claims,
        .lags = "7 days")

Seasonality

claims_tbl %>%
    plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(date, claims, 
                         .feature_set = c("observed", "season", "trend", "remainder"))

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(date, claims, .facet_ncol = 2, .interactive = FALSE)

Filter By Time

claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date,
                   .start_date = "2013",
                   .end_date = "2018") %>%
    plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, .by = "day", .pad_value = 0)

## # A tibble: 77,370 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08      0
##  3 Connecticut 1989-01-09      0
##  4 Connecticut 1989-01-10      0
##  5 Connecticut 1989-01-11      0
##  6 Connecticut 1989-01-12      0
##  7 Connecticut 1989-01-13      0
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15      0
## 10 Connecticut 1989-01-16      0
## # ℹ 77,360 more rows

Sliding (Rolling) Calculations

claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 =slidify_vec(claims, mean, .period = 2, .align = "Right", .partial = TRUE))

## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345         8345 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.

# Rolling regressions are easy to implement using `.unlist = FALSE`
lm_roll <- slidify(~ lm(..1 ~ ..2 + ..3), .period = 90, 
                   .unlist = FALSE, .align = "right")


claims_tbl %>%
  select(symbol, date, claims) %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  # Apply rolling regression
  mutate(rolling_lm = lm_roll(claims, date, numeric_date)) %>%
  filter(!is.na(rolling_lm))

## # A tibble: 10,524 × 5
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date rolling_lm
##    <fct>       <date>      <int>        <dbl> <list>    
##  1 Connecticut 1990-09-22   3927         7569 <lm>      
##  2 Connecticut 1990-09-29   4471         7576 <lm>      
##  3 Connecticut 1990-10-06   4430         7583 <lm>      
##  4 Connecticut 1990-10-13   4494         7590 <lm>      
##  5 Connecticut 1990-10-20   4894         7597 <lm>      
##  6 Connecticut 1990-10-27   4653         7604 <lm>      
##  7 Connecticut 1990-11-03   4719         7611 <lm>      
##  8 Connecticut 1990-11-10   5347         7618 <lm>      
##  9 Connecticut 1990-11-17   4824         7625 <lm>      
## 10 Connecticut 1990-11-24   5367         7632 <lm>      
## # ℹ 10,514 more rows

Apply 10

Stephen Morris

2024-05-07

Plotting time series

Box plots

Regression plots

Plotting Seasonality and Correlation

Correlation Plots

Seasonality

STL Diagnostics

Time Series Data Wrangling

Summarize by Time

Filter By Time

Padding Data

Sliding (Rolling) Calculations