setup - libaries & custom functions

# libraries 
library(tidyverse) # data wrangling & plotting
library(httr) # api get/post requests

# cnbc api function for later - pulls sector etfs
# data from this table [https://www.cnbc.com/sector-etfs/]
cnbc_api <- function(){
  request <- GET("https://quote.cnbc.com/quote-html-webservice/restQuote/symbolType/symbol?symbols=XLE%7CXLF%7CXLU%7CXLI%7CGDX%7CXLK%7CXLV%7CXLY%7CXLP%7CXLB%7CXOP%7CIYR%7CXHB%7CITB%7CVNQ%7CGDXJ%7CIYE%7COIH%7CXME%7CXRT%7CSMH%7CIBB%7CKBE%7CKRE%7CXTL&requestMethod=itv&noform=1&partnerId=2&fund=1&exthrs=1&output=json&events=1")
  
  if(status_code(request) == 200){
    contents <- content(request)
    cnbc_list <- contents[["FormattedQuoteResult"]][["FormattedQuote"]]
    cnbc_data <- lapply(1:length(cnbc_list), function(x){
      as.data.frame(cnbc_list[x])}) %>% 
      bind_rows(.)
    
    return(cnbc_data)
    
  } else {
    message(sprintf("Status Code %s - No Data!", status_code(request)))
    return()
  }
}

setup - downloading cleaned data from other markdown

# cleaned option data from orats
option_data <- readRDS(file = "data/clean_orats_core.rds")
# cleaned price data from yahoo
price_data <- readRDS(file = "data/clean_price_data.rds")

# samples of both tables
head(option_data, 3)
head(price_data, 3)

data prep - pulling sector etf tbl from cnbc

# pulling cnbc sector etf table, then displaying a sample
cnbc_tbl <- cnbc_api()

#sample
head(cnbc_tbl, 3)

# filtering for etfs whose data we have in our cleaned tables
etf_tbl <- cnbc_tbl %>%
  select("ticker" = symbol,
         name) %>%
  filter(ticker %in% unique(price_data$ticker))

# sample of final etf table
head(etf_tbl, 3)

data prep - calculating price rate-of-change & preparing to calc relative value table with energy tickers

# calculating the log rate-of-change of adjclose prices
price_returns <- price_data %>%
  group_by(ticker) %>%
  arrange(ticker, date) %>%
  mutate(price_change = round(log(adj_close/lag(adj_close)), 4)) %>%
  ungroup()

# sample
head(price_returns, 3)

# going to filter for tickers that has sector = "Energy" and see how much data for all
# if one ticker has too few - it will tamper with the correlation range since the whole table will remove the data to be equal
price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  count(ticker) %>%
  arrange(n)

# calculating pearson correlation with xle & all selected tickers
corr_tbl <- price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  pivot_wider(id_cols = date, names_from = ticker, values_from = price_change) %>%
  drop_na() %>%
  select(-date) %>%
  as.matrix(.) %>%
  cor(.) %>%
  data.frame(
    etf = "XLE",
    ticker = row.names(.),
    row.names = NULL) %>%
  select(etf, ticker, "correlation" = XLE) %>%
  arrange(desc(correlation))

# sample of correlation table
# etf = benchmark etf (xle)
# ticker = energy ticker
# correlation = pearson correlation between price log rate-of-change
head(corr_tbl, 3)

data prep - run simple linear regressions for coefficients of energy tickers ~ xle

# preparing energy price tbl for the model calculation
energy_price_tbl <- price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  pivot_wider(id_cols = date, names_from = ticker, values_from = price_change) %>%
  drop_na() 

# sample
head(energy_price_tbl, 3)

# lapply loop to calculate the model of each ticker ~ xle 
# binding data frame with their coefficients (alpha & beta)
model_tbl <- lapply(2:length(energy_price_tbl), function(x){
  ticker <- names(energy_price_tbl)[x]
  coefs <- coefficients(lm(energy_price_tbl[,ticker][[1]] ~ energy_price_tbl[,"XLE"][[1]]))
  
  df <- data.frame(
    etf = "XLE",
    ticker = ticker,
    alpha = coefs[1],
    beta = coefs[2],
    row.names = NULL)
  
  return(df)}) %>%
  bind_rows() %>%
  mutate(across(alpha:beta, function(x) round(x, 4)))

# sample
head(model_tbl, 3)

data visualization - joined all tables into an “energy table”

# joining all tables
# calculating forecast iv & difference between iv-forecast
energy_tbl <- left_join(corr_tbl, model_tbl, by = c("etf", "ticker")) %>%
  left_join(option_data, by = "ticker") %>%
  rename_all(~str_replace_all(., "atmivm", "iv")) %>%
  mutate(across(starts_with("iv"), function(x) round(first(x) * beta, 2), .names = "{.col}_forecast"),
         first_month = iv1 - iv1_forecast,
         second_month = iv2 - iv2_forecast,
         third_month = iv3 - iv3_forecast,
         fourth_month = iv4 - iv4_forecast)

# sample
head(energy_tbl, 3)

# flipped column chart of the **absolute** difference between iv & forecast iv
# in descending order, only displaying the front 2 monthlies
energy_tbl %>%
  pivot_longer(first_month:second_month) %>%
  select(ticker, correlation, name, value) %>%
  mutate(value = abs(value)) %>%
  ggplot(aes(fct_reorder(ticker, value), value, fill = correlation)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~name, scales = "free") +
  labs(title = "energy tickers: current implied vol & forecast implied vol absolute difference",
       subtitle = "absolute difference between atm monthly iv & forecast iv (with xle beta 1year)",
       x = "energy tickers", y = "absolute implied volatility difference",
       caption = "the greater the difference, the hypothetically larger the inefficiency. compare the difference with the correlation to xle. only showcasing the front two monthly expiries.")

final table - energy table

energy_tbl
---
title: "relative value scanner exercise"
output: html_notebook
---

### setup - libaries & custom functions

```{r warning=FALSE}
# libraries 
library(tidyverse) # data wrangling & plotting
library(httr) # api get/post requests

# cnbc api function for later - pulls sector etfs
# data from this table [https://www.cnbc.com/sector-etfs/]
cnbc_api <- function(){
  request <- GET("https://quote.cnbc.com/quote-html-webservice/restQuote/symbolType/symbol?symbols=XLE%7CXLF%7CXLU%7CXLI%7CGDX%7CXLK%7CXLV%7CXLY%7CXLP%7CXLB%7CXOP%7CIYR%7CXHB%7CITB%7CVNQ%7CGDXJ%7CIYE%7COIH%7CXME%7CXRT%7CSMH%7CIBB%7CKBE%7CKRE%7CXTL&requestMethod=itv&noform=1&partnerId=2&fund=1&exthrs=1&output=json&events=1")
  
  if(status_code(request) == 200){
    contents <- content(request)
    cnbc_list <- contents[["FormattedQuoteResult"]][["FormattedQuote"]]
    cnbc_data <- lapply(1:length(cnbc_list), function(x){
      as.data.frame(cnbc_list[x])}) %>% 
      bind_rows(.)
    
    return(cnbc_data)
    
  } else {
    message(sprintf("Status Code %s - No Data!", status_code(request)))
    return()
  }
}
```

### setup - downloading cleaned data from other markdown
- option data (from orats), intended to be bare-bones so this can be done with any api with option contract data
- price data (from yahoo)

```{r}
# cleaned option data from orats
option_data <- readRDS(file = "data/clean_orats_core.rds")
# cleaned price data from yahoo
price_data <- readRDS(file = "data/clean_price_data.rds")

# samples of both tables
head(option_data, 3)
head(price_data, 3)
```

### data prep - pulling sector etf tbl from cnbc
- will be using these etfs as benchmarks for the relative value table

```{r}
# pulling cnbc sector etf table, then displaying a sample
cnbc_tbl <- cnbc_api()

#sample
head(cnbc_tbl, 3)

# filtering for etfs whose data we have in our cleaned tables
etf_tbl <- cnbc_tbl %>%
  select("ticker" = symbol,
         name) %>%
  filter(ticker %in% unique(price_data$ticker))

# sample of final etf table
head(etf_tbl, 3)
```

### data prep - calculating price rate-of-change & preparing to calc relative value table with **energy** tickers
- going to calculate a pearson correlation table (with energy tickers & xle)

```{r}
# calculating the log rate-of-change of adjclose prices
price_returns <- price_data %>%
  group_by(ticker) %>%
  arrange(ticker, date) %>%
  mutate(price_change = round(log(adj_close/lag(adj_close)), 4)) %>%
  ungroup()

# sample
head(price_returns, 3)

# going to filter for tickers that has sector = "Energy" and see how much data for all
# if one ticker has too few - it will tamper with the correlation range since the whole table will remove the data to be equal
price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  count(ticker) %>%
  arrange(n)

# calculating pearson correlation with xle & all selected tickers
corr_tbl <- price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  pivot_wider(id_cols = date, names_from = ticker, values_from = price_change) %>%
  drop_na() %>%
  select(-date) %>%
  as.matrix(.) %>%
  cor(.) %>%
  data.frame(
    etf = "XLE",
    ticker = row.names(.),
    row.names = NULL) %>%
  select(etf, ticker, "correlation" = XLE) %>%
  arrange(desc(correlation))

# sample of correlation table
# etf = benchmark etf (xle)
# ticker = energy ticker
# correlation = pearson correlation between price log rate-of-change
head(corr_tbl, 3)
```

### data prep - run simple linear regressions for coefficients of energy tickers ~ xle
- creating "model table" with alpha & beta values

```{r}
# preparing energy price tbl for the model calculation
energy_price_tbl <- price_returns %>%
  filter(ticker %in% c(option_data[option_data$sector == "Energy",]$ticker, "XLE")) %>%
  pivot_wider(id_cols = date, names_from = ticker, values_from = price_change) %>%
  drop_na() 

# sample
head(energy_price_tbl, 3)

# lapply loop to calculate the model of each ticker ~ xle 
# binding data frame with their coefficients (alpha & beta)
model_tbl <- lapply(2:length(energy_price_tbl), function(x){
  ticker <- names(energy_price_tbl)[x]
  coefs <- coefficients(lm(energy_price_tbl[,ticker][[1]] ~ energy_price_tbl[,"XLE"][[1]]))
  
  df <- data.frame(
    etf = "XLE",
    ticker = ticker,
    alpha = coefs[1],
    beta = coefs[2],
    row.names = NULL)
  
  return(df)}) %>%
  bind_rows() %>%
  mutate(across(alpha:beta, function(x) round(x, 4)))

# sample
head(model_tbl, 3)
```

### data visualization - joined all tables into an "energy table"
- joined correlation, model & option data tables
- calculated the "iv forecast" with xle 1yr beta
- calculated the difference between the tickers current implied vol & forecast across each monthly expiry
- graphed a visualization of the **absolute** difference between implied vol & forecast vol
  - arranged differenced values in descending order
  - faceted the data into two plots, "front month" & "second month"
  - filled the column charts by correlation value to xle (the brighter blue the column, the greater the correlation to xle)
  - as expected, tickers least correlation to xle will have a greater spread to our forecast iv

```{r fig.height=12, fig.width=12}
# joining all tables
# calculating forecast iv & difference between iv-forecast
energy_tbl <- left_join(corr_tbl, model_tbl, by = c("etf", "ticker")) %>%
  left_join(option_data, by = "ticker") %>%
  rename_all(~str_replace_all(., "atmivm", "iv")) %>%
  mutate(across(starts_with("iv"), function(x) round(first(x) * beta, 2), .names = "{.col}_forecast"),
         first_month = iv1 - iv1_forecast,
         second_month = iv2 - iv2_forecast,
         third_month = iv3 - iv3_forecast,
         fourth_month = iv4 - iv4_forecast)

# sample
head(energy_tbl, 3)

# flipped column chart of the **absolute** difference between iv & forecast iv
# in descending order, only displaying the front 2 monthlies
energy_tbl %>%
  pivot_longer(first_month:second_month) %>%
  select(ticker, correlation, name, value) %>%
  mutate(value = abs(value)) %>%
  ggplot(aes(fct_reorder(ticker, value), value, fill = correlation)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~name, scales = "free") +
  labs(title = "energy tickers: current implied vol & forecast implied vol absolute difference",
       subtitle = "absolute difference between atm monthly iv & forecast iv (with xle beta 1year)",
       x = "energy tickers", y = "absolute implied volatility difference",
       caption = "the greater the difference, the hypothetically larger the inefficiency. compare the difference with the correlation to xle. only showcasing the front two monthly expiries.")
```

### final table - energy table
- final table with all calculations from this exercise with tickers from the energy sector
- again, there were pre-filtering done when cleaning data in another markdown, refer to that for details

```{r}
energy_tbl
```





















