library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyquant)
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(timetk)

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)
claims_tbl
## # A tibble: 10,920 × 3
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-14   6503
##  3 Connecticut 1989-01-21   3821
##  4 Connecticut 1989-01-28   4663
##  5 Connecticut 1989-02-04   4162
##  6 Connecticut 1989-02-11   4337
##  7 Connecticut 1989-02-18   4079
##  8 Connecticut 1989-02-25   3556
##  9 Connecticut 1989-03-04   3826
## 10 Connecticut 1989-03-11   3515
## # ℹ 10,910 more rows
claims_tbl %>%
    plot_time_series(.date_var =  date, .value = claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1820
## 2 Massachusetts  1820
## 3 Maine          1820
## 4 New Hampshire  1820
## 5 Rhode Island   1820
## 6 Vermont        1820
claims_tbl %>% 
    group_by(symbol) %>%
    plot_time_series(
        .date_var     = date, 
        .value        = claims, 
        .facet_ncol   = 2, 
        .facet_scales = "free",
        .interactive  = FALSE)

Visualizing Transformations & Sub-Groups

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1820
## 2 Massachusetts  1820
## 3 Maine          1820
## 4 New Hampshire  1820
## 5 Rhode Island   1820
## 6 Vermont        1820
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var     = date, 
        .value        = claims,
        .facet_ncol   = 2,
        .facet_scales = "free",
        .color_var = week(date))

Static ggplot2 Visualizations & Customizations

claims_tbl %>%
    plot_time_series(date, claims, 
                     .color_var = month(date, label = TRUE),
                     
                     # Returns static ggplot
                     .interactive = FALSE, 
                     
                     # Customize
                     .title = "Unemployment Claims Data", 
                     .x_lab = "Year", 
                     .y_lab = "Unemployment Claims Filed", 
                     .color_lab = "Month")

Box Plots

claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1820
## 2 Massachusetts  1820
## 3 Maine          1820
## 4 New Hampshire  1820
## 5 Rhode Island   1820
## 6 Vermont        1820
claims_tbl %>% 
    filter_by_time(.date_var = date, .end_date = "2009") %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var  = date,
        .value     = claims,
        .period    = "1 year",
        .facet_ncol = 2)

Regression Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var     = date,
        .formula      = log(claims) ~ as.numeric(date) + month(date, label = TRUE),
        .facet_ncol   = 2,
        .interactive  = FALSE,
        .show_summary = FALSE
    )

Plotting Seasonaility and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(date, claims, .lags = "1 month")
claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(date, claims, .ccf_vars = c(date, claims), 
                         .lags = "3 months")

Seasonaility

claims_tbl %>%
    plot_seasonal_diagnostics(date, claims)
claims_tbl %>% count(symbol)
## # A tibble: 6 × 2
##   symbol            n
##   <fct>         <int>
## 1 Connecticut    1820
## 2 Massachusetts  1820
## 3 Maine          1820
## 4 New Hampshire  1820
## 5 Rhode Island   1820
## 6 Vermont        1820
claims_tbl %>%
    group_by(symbol) %>%
    plot_seasonal_diagnostics(date, claims)

STL Diagnostics

claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        date, claims, 
        .feature_set = c("observed", "season", "trend", "remainder"))
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year
## frequency = 13 observations per 1 quarter
## trend = 53 observations per 1 year

Time Series Data Wrangling

Summarize by Time #ISSUE

claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, volume = sum(claims), .by = "year") %>% 
    plot_time_series(date, volume, .facet_ncol = 2, .interactive = FALSE)

claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(.date_var = date, adjusted = mean(claims), .by = "year") %>% 
    plot_time_series(date, adjusted, .facet_ncol = 2, .interactive = FALSE)

Filter by Time

claims_tbl %>%
    group_by(symbol) %>%
    filter_by_time(.date_var = date,
                   .start_date = "1989",
                   .end_date = "2009") %>%
    plot_time_series(date, claims, .facet_ncol = 2)

Padding Data

claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(date, .by = "day", .fill_na_direction = "down")
## # A tibble: 76,404 × 3
## # Groups:   symbol [6]
##    symbol      date       claims
##    <fct>       <date>      <int>
##  1 Connecticut 1989-01-07   8345
##  2 Connecticut 1989-01-08   8345
##  3 Connecticut 1989-01-09   8345
##  4 Connecticut 1989-01-10   8345
##  5 Connecticut 1989-01-11   8345
##  6 Connecticut 1989-01-12   8345
##  7 Connecticut 1989-01-13   8345
##  8 Connecticut 1989-01-14   6503
##  9 Connecticut 1989-01-15   6503
## 10 Connecticut 1989-01-16   6503
## # ℹ 76,394 more rows

Sliding (Rolling) Calculations #ISSUE

claims_tbl %>%
    head(10) %>%
    mutate(rolling_avg_2 = slidify_vec(claims, mean, .period = 2, .align = "right"))
## # A tibble: 10 × 4
##    symbol      date       claims rolling_avg_2
##    <fct>       <date>      <int>         <dbl>
##  1 Connecticut 1989-01-07   8345           NA 
##  2 Connecticut 1989-01-14   6503         7424 
##  3 Connecticut 1989-01-21   3821         5162 
##  4 Connecticut 1989-01-28   4663         4242 
##  5 Connecticut 1989-02-04   4162         4412.
##  6 Connecticut 1989-02-11   4337         4250.
##  7 Connecticut 1989-02-18   4079         4208 
##  8 Connecticut 1989-02-25   3556         3818.
##  9 Connecticut 1989-03-04   3826         3691 
## 10 Connecticut 1989-03-11   3515         3670.
lm_roll <- slidify(~ lm(..1 ~ ..2), .period = 90, .unlist = FALSE, .align = "right")
claims_tbl %>%
    select(symbol, date, claims) %>%
    group_by(symbol) %>%
    mutate(numeric_date = as.numeric(date)) %>%
    mutate(rolling_lm = lm_roll(claims, numeric_date)) %>%
    filter(!is.na(rolling_lm))
## # A tibble: 10,386 × 5
## # Groups:   symbol [6]
##    symbol      date       claims numeric_date rolling_lm
##    <fct>       <date>      <int>        <dbl> <list>    
##  1 Connecticut 1990-09-22   3927         7569 <lm>      
##  2 Connecticut 1990-09-29   4471         7576 <lm>      
##  3 Connecticut 1990-10-06   4430         7583 <lm>      
##  4 Connecticut 1990-10-13   4494         7590 <lm>      
##  5 Connecticut 1990-10-20   4894         7597 <lm>      
##  6 Connecticut 1990-10-27   4653         7604 <lm>      
##  7 Connecticut 1990-11-03   4719         7611 <lm>      
##  8 Connecticut 1990-11-10   5347         7618 <lm>      
##  9 Connecticut 1990-11-17   4824         7625 <lm>      
## 10 Connecticut 1990-11-24   5367         7632 <lm>      
## # ℹ 10,376 more rows