Loading libraries

library(tidyverse)
library(highcharter)
library(quantmod)
library(doParallel)
library(lubridate)

registerDoParallel(cores = 10)
options(scipen = 100)

Helper functions

For future summaries/transformations, implied correlation equation at the bottom of the markdown

# Helper Functions

divide <- function(x){
  x/100
}

# implied correlation equation: more about this at the end of the markdown

imp_corr_value <- function(x = x, comp_data = comp_data, factor = "iv10d"){
  
  adj_weight = x$weight/sum(x$weight)
  ticker = unique(x$composite_ticker)
  
  etf_data = filter(comp_data, composite_ticker == ticker)
  etf_var = etf_data[,factor]^2
  
  value = etf_var/(sum(x[,factor]*adj_weight)^2)
  return(value)
}

imp_corr_tbl <- function(x, comp_data){
  
  ticker = unique(x$composite_ticker)
  
  df = data.frame(
    ticker = ticker,
    dte10 = imp_corr_value(x = x, comp_data = comp_data),
    dte20 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv20d"),
    dte30 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv30d"),
    dte60 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv60d"),
    dte90 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv90d"),
    dte6m = imp_corr_value(x = x, comp_data = comp_data, factor = "iv6m"),
    dte1yr = imp_corr_value(x = x, comp_data = comp_data, factor = "iv1yr"),
    row.names = NULL
  )
  return(df)
}

Downloading Data

holdings_raw has all the holdings and corresponding weights for each etf of “2021-11-01”. quandl_data has all the vol data. vol_data is subsetting the implied vol dte’s i’m going to use for this table. the data comes with vol*100, so i transformed the vol data back to it’s percentage with the divide helper function

# Downloading Data

holdings_raw <- read_csv("holdings_data.csv")
quandl_data <- read_csv("quandl_data.csv")

vol_data <- quandl_data %>%
  select(ticker, tradedate, iv10d, iv20d, iv30d, iv60d, iv90d, iv6m, iv1yr) %>% 
  mutate_if(is.numeric, divide) %>%
  arrange(ticker)

holdings_raw data (first 100 rows)

vol_data data

# Transforms

# Filtering for tickers that quandl has vol data for
holdings_raw <- na.omit(holdings_raw[,2:5]) %>%
  mutate(as_of_date = ymd(as_of_date)) %>%
  filter(
    composite_ticker %in% vol_data$ticker,
    constituent_ticker %in% vol_data$ticker
  )

# Filtering out ETFs with summed weights of less than 50%
comp_tickers_weight_filtered <- holdings_raw %>%
  group_by(composite_ticker) %>%
  summarise(value = sum(weight)) %>%
  arrange(desc(value)) %>%
  filter(value >= 0.50) %>%
  pull(composite_ticker)

holdings_df <- holdings_raw %>%
  filter(
    composite_ticker %in% comp_tickers_weight_filtered
  ) %>%
  select(-c(as_of_date))


# Separating ETF data
comp_tickers = holdings_df %>%
  select(-c(constituent_ticker, weight)) %>%
  distinct()

holdings_df

const_data: merged dataframe of constituent tickers and it’s corresponding iv + weight values

implied correlation summaries

mapped heped function imp_cprr_tabl to the listed_data. then innerjoined the resulting dataframe with a new column called “sum_weights”, this tells us the sum weights of the holdings that i had vol data for (and applied the imp correlation with)

merging data

sourcing composite (etf) name, merging all dataframes


metrics <- yahooQF(c("Name", "Change in Percent"))

tickers_yf = getQuote(df$ticker, what = metrics) %>%
  mutate(ticker = rownames(.),
         Name = ifelse(is.na(Name), ticker, Name))

ticker_names <- tickers_yf %>%
  mutate(ticker = rownames(tickers_yf)) %>%
  select(ticker, everything())

final = df %>%
  left_join(ticker_names[,c("ticker", "Name")], by = c("ticker")) %>%
  mutate(date = "2021-11-01") %>%
  select(ticker, Name, date, pxatmiv, everything()) %>%
  rename("Price" = pxatmiv)

final

cleaning df

added ‘leveraged’ column, string detected for “2”, “3”, “ultra”, or “direxion” and labeled them TRUE to filter out leveraged etfs. changed all column name to lower-case, and renamed the imp corr column values to their corresponding ivol[dte]

imp corr equation used

x = constituent data. comp_data = composite (etf) data. factor = ivol[dte] used.

adj_weight = constituent weight divided by the sum of all constituent weights (some etfs vary in the amount of weight data available. so this is to adjusted it so that an initial sum of 70% weight = 100% weight)

ticker = composite (etf) tickers

etf_data = composite (etf) vol data

etf_var = etf variance for selected factor (ie. SPY iv[dte]^2)

value = imp corr equation Caption for the picture.

etf variance/sum(constituent vol * constituent adjusted_weight)^2

imp_corr_value <- function(x = x, comp_data = comp_data, factor = "iv10d"){
  
  adj_weight = x$weight/sum(x$weight)
  ticker = unique(x$composite_ticker)
  
  etf_data = filter(comp_data, composite_ticker == ticker)
  etf_var = etf_data[,factor]^2
  
  value = etf_var/(sum(x[,factor]*adj_weight)^2)
  return(value)
}
---
title: "Implied Correlation Table"
output: html_notebook
---

# Loading libraries

```{r}
library(tidyverse)
library(highcharter)
library(quantmod)
library(doParallel)
library(lubridate)

registerDoParallel(cores = 10)
options(scipen = 100)
```

# Helper functions
For future summaries/transformations, implied correlation equation at the bottom of the markdown

```{r, echo=TRUE}
# Helper Functions

divide <- function(x){
  x/100
}

# implied correlation equation: more about this at the end of the markdown

imp_corr_value <- function(x = x, comp_data = comp_data, factor = "iv10d"){
  
  adj_weight = x$weight/sum(x$weight)
  ticker = unique(x$composite_ticker)
  
  etf_data = filter(comp_data, composite_ticker == ticker)
  etf_var = etf_data[,factor]^2
  
  value = etf_var/(sum(x[,factor]*adj_weight)^2)
  return(value)
}

imp_corr_tbl <- function(x, comp_data){
  
  ticker = unique(x$composite_ticker)
  
  df = data.frame(
    ticker = ticker,
    dte10 = imp_corr_value(x = x, comp_data = comp_data),
    dte20 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv20d"),
    dte30 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv30d"),
    dte60 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv60d"),
    dte90 = imp_corr_value(x = x, comp_data = comp_data, factor = "iv90d"),
    dte6m = imp_corr_value(x = x, comp_data = comp_data, factor = "iv6m"),
    dte1yr = imp_corr_value(x = x, comp_data = comp_data, factor = "iv1yr"),
    row.names = NULL
  )
  return(df)
}
```

# Downloading Data
holdings_raw has all the holdings and corresponding weights for each etf of "2021-11-01". quandl_data has all the vol data. vol_data is subsetting the implied vol dte's i'm going to use for this table. the data comes with vol*100, so i transformed the vol data back to it's percentage with the divide helper function

```{r echo=TRUE}
# Downloading Data

holdings_raw <- read_csv("holdings_data.csv")
quandl_data <- read_csv("quandl_data.csv")

vol_data <- quandl_data %>%
  select(ticker, tradedate, iv10d, iv20d, iv30d, iv60d, iv90d, iv6m, iv1yr) %>% 
  mutate_if(is.numeric, divide) %>%
  arrange(ticker)
```

# holdings_raw data (first 100 rows)

```{r}
holdings_raw[1:100,]
```

# vol_data data

```{r}
vol_data
```

```{r}
# Transforms

# Filtering for tickers that quandl has vol data for
holdings_raw <- na.omit(holdings_raw[,2:5]) %>%
  mutate(as_of_date = ymd(as_of_date)) %>%
  filter(
    composite_ticker %in% vol_data$ticker,
    constituent_ticker %in% vol_data$ticker
  )

# Filtering out ETFs with summed weights of less than 50%
comp_tickers_weight_filtered <- holdings_raw %>%
  group_by(composite_ticker) %>%
  summarise(value = sum(weight)) %>%
  arrange(desc(value)) %>%
  filter(value >= 0.50) %>%
  pull(composite_ticker)

holdings_df <- holdings_raw %>%
  filter(
    composite_ticker %in% comp_tickers_weight_filtered
  ) %>%
  select(-c(as_of_date))


# Separating ETF data
comp_tickers = holdings_df %>%
  select(-c(constituent_ticker, weight)) %>%
  distinct()

```

holdings_df

```{r}
holdings_df
```

```{r}

# Joining holdings data with vol data

comp_data <- comp_tickers %>%
  inner_join(vol_data, by = c("composite_ticker" = "ticker"))

const_data <- holdings_df %>%
  inner_join(vol_data, by = c("constituent_ticker" = "ticker")) 

# Putting data into list which is id'd by their parent etf
listed_data = split(const_data, f = const_data$composite_ticker)

```

const_data: merged dataframe of constituent tickers and it's corresponding iv + weight values

```{r}
const_data
```

# implied correlation summaries

mapped heped function imp_cprr_tabl to the listed_data. then innerjoined the resulting dataframe with a new column called "sum_weights", this tells us the sum weights of the holdings that i had vol data for (and applied the imp correlation with)

```{r}
# Implied Correlation Calculation

values = map_df(listed_data, function(x) imp_corr_tbl(x = x, comp_data = comp_data))


# Extra: Adding summed_weights as a column to the final calculations

summed_weights <- holdings_raw %>%
  group_by(composite_ticker) %>%
  summarise(sum_weights = sum(weight)) %>%
  filter(sum_weights >= 0.50) 

df = values %>%
  inner_join(summed_weights, by = c("ticker" = "composite_ticker")) %>%
  inner_join(quandl_data[,c("ticker", "pxatmiv")], by = c("ticker"))

```

```{r}
df
```

# merging data

sourcing composite (etf) name, merging all dataframes

```{r}

metrics <- yahooQF(c("Name", "Change in Percent"))

tickers_yf = getQuote(df$ticker, what = metrics) %>%
  mutate(ticker = rownames(.),
         Name = ifelse(is.na(Name), ticker, Name))

ticker_names <- tickers_yf %>%
  mutate(ticker = rownames(tickers_yf)) %>%
  select(ticker, everything())

final = df %>%
  left_join(ticker_names[,c("ticker", "Name")], by = c("ticker")) %>%
  mutate(date = "2021-11-01") %>%
  select(ticker, Name, date, pxatmiv, everything()) %>%
  rename("Price" = pxatmiv)

```

final

```{r}
final
```

# cleaning df

added 'leveraged' column, string detected for "2", "3", "ultra", or "direxion" and labeled them TRUE to filter out leveraged etfs.
changed all column name to lower-case, and renamed the imp corr column values to their corresponding ivol[dte]

```{r}
final  = final %>%
  mutate(
    leveraged = str_detect(final$Name, "3|2|Ultra|Direxion") 
  ) 

colnames(final) <- tolower(colnames(final))

final = final %>%
  mutate_if(is.numeric, round, 3) %>%
  rename(
    "dte10" = iv10d,
    "dte20" = iv20d,
    "dte30" = iv30d,
    "dte60" = iv60d,
    "dte90" = iv90d,
    "dte6m" = iv6m,
    "dte1yr" = iv1yr
  )
```

```{r}
final
```

# imp corr equation used

x = constituent data.
comp_data = composite (etf) data.
factor = ivol[dte] used.

adj_weight = constituent weight divided by the sum of all constituent weights (some etfs vary in the amount of weight data available. so this is to adjusted it so that an initial sum of 70% weight = 100% weight)

ticker = composite (etf) tickers

etf_data = composite (etf) vol data

etf_var = etf variance for selected factor (ie. SPY iv[dte]^2)

value = imp corr equation
![Caption for the picture.](imp_corr_image.png)

etf variance/sum(constituent vol * constituent adjusted_weight)^2

```{r}
imp_corr_value <- function(x = x, comp_data = comp_data, factor = "iv10d"){
  
  adj_weight = x$weight/sum(x$weight)
  ticker = unique(x$composite_ticker)
  
  etf_data = filter(comp_data, composite_ticker == ticker)
  etf_var = etf_data[,factor]^2
  
  value = etf_var/(sum(x[,factor]*adj_weight)^2)
  return(value)
}
```











