Apply 10 Anil Eser

# for Core packages
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# for financial analysis
library(tidyquant)

## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

# for times series
library(timetk)

Goal: Apply Matt Dancho’s tutorial to state unemployment initial claims of New England states.

The following is the replication of Matt Dancho’s tutorial on this page

start_date <- "1989-01-01"

symbols_txt <- c("CTICLAIMS", # Connecticut
                 "MEICLAIMS", # Maine
                 "MAICLAIMS", # Massachusetts
                 "NHICLAIMS", # New Hampshire
                 "RIICLAIMS", # Rhode Island
                 "VTICLAIMS") # Vermont

claims_tbl <- tq_get(symbols_txt, get = "economic.data", from = start_date) %>%
    mutate(symbol = fct_recode(symbol,
                               "Connecticut"   = "CTICLAIMS",
                               "Maine"         = "MEICLAIMS",
                               "Massachusetts" = "MAICLAIMS",
                               "New Hampshire" = "NHICLAIMS",
                               "Rhode Island"  = "RIICLAIMS",
                               "Vermont"       = "VTICLAIMS")) %>%
    rename(claims = price)

Plotting time series

claims_tbl %>% glimpse()

## Rows: 11,244
## Columns: 3
## $ symbol <fct> Connecticut, Connecticut, Connecticut, Connecticut, Connecticut…
## $ date   <date> 1989-01-07, 1989-01-14, 1989-01-21, 1989-01-28, 1989-02-04, 19…
## $ claims <int> 8345, 6503, 3821, 4663, 4162, 4337, 4079, 3556, 3826, 3515, 288…

# Plot the time series
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date, 
        .value = claims, 
        .facet_ncol = 2,           
        .smooth = TRUE,            
        .title = "Unemployment Initial Claims in New England States")

Box plots

# Filter data by a specific time period
filtered_claims_tbl <- claims_tbl %>%
    filter_by_time(.date_var = date, .start = "2010-01-01", .end = "2020-12-31")

# Check the filtered data
glimpse(filtered_claims_tbl)

## Rows: 3,444
## Columns: 3
## $ symbol <fct> Connecticut, Connecticut, Connecticut, Connecticut, Connecticut…
## $ date   <date> 2010-01-02, 2010-01-09, 2010-01-16, 2010-01-23, 2010-01-30, 20…
## $ claims <int> 9247, 13760, 10518, 6970, 5688, 5707, 4762, 6780, 7614, 5532, 4…

# Create box plots
filtered_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_boxplot(
        .date_var = date,
        .value = claims,
        .period = "1 month",          # Aggregate data by month for box plots
        .facet_ncol = 2,              
        .title = "Monthly Distribution of Unemployment Claims (2010-2020)")

Regression plots

# Create a regression plot
claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series_regression(
        .date_var = date,
        .formula = claims ~ as.numeric(date), 
        .facet_ncol = 2,                      # Arrange plots in 2 columns
        .title = "Linear Regression of Unemployment Claims Over Time" )

Plotting Seasonality and Correlation

Correlation Plots

claims_tbl %>%
    group_by(symbol) %>%
    plot_acf_diagnostics(
        .date_var = date,
        .value = claims,
        .lags = 24,                 
        .facet_ncol = 2,            
        .title = "Autocorrelation Diagnostics for Unemployment Claims")

Seasonality

# Plot seasonality diagnostics
claims_tbl %>%
    group_by(symbol) %>%
    plot_seasonal_diagnostics(
        .date_var = date,            
        .value = claims,             
        .title = "Seasonality Diagnostics for Unemployment Claims")

STL Diagnostics

filtered_claims_tbl <- claims_tbl %>%
    filter(symbol %in% c("Connecticut", "Maine"))


filtered_claims_tbl %>%
    group_by(symbol) %>%
    plot_stl_diagnostics(
        .date_var = date,
        .value = claims,
        .feature_set = c("observed", "trend", "season", "remainder"), # Select specific components
        .title = "STL Diagnostics for Selected States (Specific Features)",
        .interactive = FALSE ) +
    theme(
        axis.text = element_text(size = 7),  
        axis.ticks = element_line(size = 0.5), 
        panel.grid = element_line(color = "grey80") 
    )

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## frequency = 13 observations per 1 quarter

## trend = 53 observations per 1 year

## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Time Series Data Wrangling

Summarize by Time

# Summarize unemployment claims by year
yearly_claims_tbl <- claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(
        .date_var = date,
        .by = "year",               
        claims = mean(claims, na.rm = TRUE) 
    )

# Plot the yearly summarized data with facets
yearly_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date,
        .value = claims,
        .facet_ncol = 2,            
        .smooth = TRUE,             
        .title = "Yearly Average Unemployment Claims by State",
        .interactive = FALSE        
    )

Filter By Time

filtered_claims_tbl <- claims_tbl %>%
    filter_by_time(
        .date_var = date,
        .start = "2010-01-01",
        .end = "2020-12-31" 
    )

# Plot the filtered data
filtered_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date,
        .value = claims,
        .facet_ncol = 2,            
        .smooth = TRUE,             
        .title = "Filtered Unemployment Claims (2010-2020)",
        .interactive = FALSE        
    )

Padding Data

monthly_claims_tbl <- claims_tbl %>%
    group_by(symbol) %>%
    summarise_by_time(
        .date_var = date,
        .by = "month",              
        claims = mean(claims, na.rm = TRUE) 
    )


padded_monthly_claims_tbl <- monthly_claims_tbl %>%
    group_by(symbol) %>%
    pad_by_time(
        .date_var = date,
        .by = "month"              
    )

# Check the padded data
glimpse(padded_monthly_claims_tbl)

## Rows: 2,586
## Columns: 3
## Groups: symbol [6]
## $ symbol <fct> Connecticut, Connecticut, Connecticut, Connecticut, Connecticut…
## $ date   <date> 1989-01-01, 1989-02-01, 1989-03-01, 1989-04-01, 1989-05-01, 19…
## $ claims <dbl> 5833.00, 4033.50, 3277.25, 3506.60, 2938.50, 3755.50, 5478.20, …

# Plot the padded data
padded_monthly_claims_tbl %>%
    group_by(symbol) %>%
    plot_time_series(
        .date_var = date,
        .value = claims,
        .facet_ncol = 2,           
        .smooth = TRUE,            
        .title = "Padded Unemployment Claims Data (Monthly Aggregated)",
        .interactive = FALSE       
    )

Sliding (Rolling) Calculations

lm_roll <- slidify(
  ~ lm(..1 ~ ..2 + ..3), 
  .period = 90,        
  .unlist = FALSE,    
  .align = "right"     
)

rolling_claims_tbl <- claims_tbl %>%
  group_by(symbol) %>%
  mutate(numeric_date = as.numeric(date)) %>%
  
  mutate(
    rolling_lm = lm_roll(claims, numeric_date, numeric_date) 
  ) %>%
  filter(!is.na(rolling_lm)) 


glimpse(rolling_claims_tbl)

## Rows: 10,710
## Columns: 5
## Groups: symbol [6]
## $ symbol       <fct> Connecticut, Connecticut, Connecticut, Connecticut, Conne…
## $ date         <date> 1990-09-22, 1990-09-29, 1990-10-06, 1990-10-13, 1990-10-…
## $ claims       <int> 3927, 4471, 4430, 4494, 4894, 4653, 4719, 5347, 4824, 536…
## $ numeric_date <dbl> 7569, 7576, 7583, 7590, 7597, 7604, 7611, 7618, 7625, 763…
## $ rolling_lm   <list> [-11224.937411, 2.192839, NA, 4338.48034, 2481.13047, -2…