library(readr)
library(tidyr)
library(dplyr)
library(purrr)
library(broom)
library(ggplot2)
CDS_0 <- read_delim("~/Projects/cds-svi/data/CDS.csv", ";", escape_double = FALSE, trim_ws = TRUE)
## Parsed with column specification:
## cols(
## Date = col_character(),
## `log (CDS return)` = col_double(),
## SVI_Ticker = col_integer(),
## SVI_Name = col_integer(),
## Company = col_character(),
## Sector = col_character()
## )
CDS <- CDS_0 %>%
rename(time = Date, sector = Sector, logcds = `log (CDS return)`, svi = SVI_Name,
svi_ticker = SVI_Ticker, name = Company) %>%
mutate(time = as.Date(time, format = "%d.%m.%y"),
# replace zeros with 1e-10
svi = ifelse(svi == 0, 1e-10, svi),
logsvi = log(svi),
name = as.factor(name),
sector = as.factor(sector))
# discard cases with missing values
CDS <- CDS[complete.cases(CDS), ]
# remove factor levels with missing values
CDS <- droplevels(CDS)
Write a function to compute the cross-correlation with a given lag. The value returned by ccf(x, y) estimates the correlation between x[t+k] and y[t].
cross_corr <- function(df, lag = 20) {
tidy(ccf(x = df$logsvi, y = df$logcds, lag.max = lag,
plot = FALSE))
}
Now we group the data by sector and name, and then apply the cross_corr function to every name.
by_name <- CDS %>% group_by(sector, name) %>% nest()
by_name <- by_name %>%
mutate(crosscorr = purrr::map(by_name$data, cross_corr))
Next, we can ‘unnest’ the resulting data frame and plot the acf by lag values for each name, in a separate plot for each sector.
ccfs <- unnest(by_name, crosscorr)
ccfs %>%
ggplot(aes(lag, acf)) +
geom_rect(xmin = -10, xmax = 0, ymin = -Inf, ymax = Inf, fill = 'grey90', alpha = 0.1) +
geom_hline(yintercept = c(-0.1, 0, 0.1), linetype = 2) +
geom_vline(xintercept = 0, linetype = 2) +
# geom_hline(yintercept = 0.1, linetype = 2) +
geom_point(aes(group = sector, color = name), alpha = 2 / 3) +
facet_wrap(~ sector) +
theme_bw() +
theme(legend.position = "none")