Purpose

  • The purpose of this script is to show the steps and decisions taken to complete the pre-assignment 1 for FINA 9210.

  • The data used are downloaded from Dropbox and saved locally in the same folder as this script.

library(dplyr)
library(tidyverse)
library(data.table)
library(ggplot2)
library(kableExtra)
library(xtable)
library(TTR)
library(zoo)
library(MASS)

Loading the data and merging

stock <- read.csv("raw_returns.csv")
ticker <- read.csv("ticker_permno.csv")

stock$permno <- as.character(stock$permno)
ticker$permno <- as.character(ticker$permno)
  • Noticed that SPY has multiple permnos, which were dated back to the 60s and 70s. We will now disregard these permnos.
# For each ticker, keep observations of the most recent permno

ticker_list <- ticker %>% 
  group_by(ticker) %>% 
  filter(date == max(date)) %>% 
  ungroup() %>%
  dplyr::select(ticker, permno)

stock <- stock %>% 
  left_join(ticker_list, by = "permno") %>% 
  dplyr::select(-permno) 

Problem 2

a) Summary Statistics

stock %>% 
  group_by(ticker) %>% 
  summarise(
    mean = mean(ret, na.rm = TRUE),
    sd = sd(ret, na.rm = TRUE),
    med = median(ret, na.rm = TRUE),
    min = min(ret, na.rm = TRUE),
    max = max(ret, na.rm = TRUE)
  ) %>%  
  kable(digits = 3, col.names = c("Ticker Symbol", "Mean", "SD", "Median", "Min", "Max")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Ticker Symbol Mean SD Median Min Max
QQQ 0.012 0.053 0.018 -0.156 0.150
SHY 0.001 0.004 0.001 -0.014 0.018
SPY 0.009 0.043 0.013 -0.165 0.127
TLT 0.004 0.040 0.003 -0.131 0.143
VB 0.009 0.056 0.015 -0.219 0.187
VNQ 0.008 0.064 0.014 -0.317 0.307
VTV 0.008 0.043 0.014 -0.162 0.128

Generating LATEX code

sum_table <- stock %>% 
  group_by(ticker) %>% 
  summarise(
    mean = mean(ret, na.rm = TRUE),
    sd = sd(ret, na.rm = TRUE),
    med = median(ret, na.rm = TRUE),
    min = min(ret, na.rm = TRUE),
    max = max(ret, na.rm = TRUE)
  )

print(xtable(sum_table, digits = 3, caption = "Summary Statistics", label = "tab:sum_table"), 
      caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:09 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Summary Statistics}} 
## \label{tab:sum_table}
## \begin{tabular}{lrrrrr}
##   \hline
## ticker & mean & sd & med & min & max \\ 
##   \hline
## QQQ & 0.012 & 0.053 & 0.018 & -0.156 & 0.150 \\ 
##   SHY & 0.001 & 0.004 & 0.001 & -0.014 & 0.018 \\ 
##   SPY & 0.009 & 0.043 & 0.013 & -0.165 & 0.127 \\ 
##   TLT & 0.004 & 0.040 & 0.003 & -0.131 & 0.143 \\ 
##   VB & 0.009 & 0.056 & 0.015 & -0.219 & 0.187 \\ 
##   VNQ & 0.008 & 0.064 & 0.014 & -0.317 & 0.307 \\ 
##   VTV & 0.008 & 0.043 & 0.014 & -0.162 & 0.128 \\ 
##    \hline
## \end{tabular}
## \end{table}

b) Plotting cummulative returns

stock$date <- as.Date(stock$date, format = "%Y-%m-%d")


cum_return <- stock %>% 
  group_by(ticker) %>% 
  arrange(date) %>% 
  na.omit() %>%  # Removes rows with NA values
  mutate(cum_ret = cumprod(1 + ret) - 1) %>% 
  ggplot(aes(x = date, y = cum_ret, color = ticker)) +
  geom_line() +
  labs(title = "Cumulative Returns", x = "Date", y = "Cumulative Returns") +
  theme_minimal() +
  theme(legend.position = "right")

print(cum_return)

# export 

ggsave("cumulative_returns.png", plot = cum_return, width = 10, height = 6, dpi = 300)
# Finding largest drawdown for each ticker 

draw_down <- stock %>% 
  group_by(ticker) %>% 
  arrange(date) %>% 
  na.omit() %>% 
  mutate(cum_ret = cumprod(1 + ret) - 1) %>% 
  mutate(drawdown = cum_ret - cummax(cum_ret)) %>% 
  summarise(max_drawdown = min(drawdown)) 


draw_down %>% 
  kable(digits = 3, col.names = c("Ticker Symbol", "Max Drawdown")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Ticker Symbol Max Drawdown
QQQ -4.095
SHY -0.075
SPY -1.451
TLT -1.667
VB -1.467
VNQ -1.615
VTV -0.931
# export latex

print(xtable(draw_down, digits = 3, caption = "Max Drawdown", label = "tab:draw_down"), 
      caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:11 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Max Drawdown}} 
## \label{tab:draw_down}
## \begin{tabular}{lr}
##   \hline
## ticker & max\_drawdown \\ 
##   \hline
## QQQ & -4.095 \\ 
##   SHY & -0.075 \\ 
##   SPY & -1.451 \\ 
##   TLT & -1.667 \\ 
##   VB & -1.467 \\ 
##   VNQ & -1.615 \\ 
##   VTV & -0.931 \\ 
##    \hline
## \end{tabular}
## \end{table}

Using Mean Reversion Strategy

Setting up

  • I absolutely don’t believe in technical analysis and trade stocks based on historical data. I am strong believer in semi-strong market efficiency. Or at the very least, I’ve never made money trading a stock based on charts or technical factors.

  • For this assignment, I will use Mean Reversion Strategy to trade SPY.

  • The strategy is as follows:

    1. Calculate the Z-score of the stock returns based on a rolling mean and standard deviation.

    2. Generate trading signals based on the Z-score. Go long when the Z-score is below a -0.5 and go short when the Z-score is above a 0.5. Stay neutral otherwise.

  • Z-score is calculated as:

    \[ Z_{score} = \frac{ret - \text{10-month rolling average}}{\text{10-month rolling standard deviation}} \]

  • Rolling mean and standard deviation are calculated based on a 10-month look-back period.

  • Assuming only one stock is in the portfolio, SPY.

SPY <- stock %>% 
  filter(ticker == "SPY") %>% 
  dplyr::select(date, ret) %>% 
  arrange(date) %>% 
  na.omit()
# Calculate the Z-score
lookback_period <- 10  # 10-month look-back period

spy_df <- SPY %>%
  mutate(
    rolling_mean = rollapply(ret, width = lookback_period, FUN = mean, align = 'right', fill = NA),
    rolling_sd = rollapply(ret, width = lookback_period, FUN = sd, align = 'right', fill = NA),
    Z_score = (ret - rolling_mean) / rolling_sd
  )

#Generate Trading Signals Based on Z-score
threshold <- 0.5  # Threshold for Z-score

spy_df <- spy_df %>%
  mutate(
    Signal = case_when(
      Z_score < -threshold ~ 1,  # Go Long when Z-score is below -threshold
      Z_score > threshold ~ -1,  # Go Short when Z-score is above threshold
      TRUE ~ 0  # Stay Neutral if within the threshold range
    )
  )

Cummulative Returns

# Backtest the Strategy
spy_df <- spy_df %>%
  mutate(
    Strategy_Return = Signal * ret,
    Cumulative_Strategy_Return = cumprod(1 + Strategy_Return),
    Cumulative_SPY_Return = cumprod(1 + ret)
  )

#Evaluate the Performance
# Plot cumulative returns
mean_strategy <- ggplot(spy_df, aes(x = date)) +
  geom_line(aes(y = Cumulative_Strategy_Return, color = "Strategy")) +
  geom_line(aes(y = Cumulative_SPY_Return, color = "SPY")) +
  labs(title = "Cumulative Returns: Mean Reversion Strategy vs SPY",
       y = "Cumulative Return",
       x = "Date") +
  theme_minimal()


print(mean_strategy)

## Export the plot

ggsave("mean_reversion.png", plot = mean_strategy, width = 10, height = 6, dpi = 300)

Table of summary statitiscs (max, min, median, SD, Sharpe ratio)

  • To calculate the Sharpe ratio, I used this formula

    \[ \text{Sharpe Ratio} = \frac{R_p - R_f}{\sigma_p} \]

    where:

    • \(R_p\) is the average return of the portfolio.

    • \(R_f\) is the risk-free rate. Currently assumed to be 3.89% (10 Y Treasury Rate as of 08/17/2024)

    • \(\sigma_p\) is the standard deviation of the portfolio returns.

summary_table <- spy_df %>% 
  summarise(
    max = max(Strategy_Return, na.rm = TRUE),
    min = min(Strategy_Return, na.rm = TRUE),
    median = median(Strategy_Return, na.rm = TRUE),
    sd = sd(Strategy_Return, na.rm = TRUE),
    sharpe = {
      # Calculate annualized return
      annualized_return <- function(returns) {
        n_months <- length(returns)
        (prod(1 + returns)^(12 / n_months)) - 1
      }
      
      # Calculate annualized volatility
      annualized_volatility <- function(returns) {
        sd(returns) * sqrt(12)
      }
      
      # Sharpe ratio calculation
      risk_free_rate <- 0.0398
      ann_return <- annualized_return(Strategy_Return)
      ann_volatility <- annualized_volatility(Strategy_Return)
      (ann_return - risk_free_rate) / ann_volatility
    }
  ) 

summary_table %>% 
  kable(digits = 3, col.names = c("Max", "Min", "Median", "SD", "Sharpe Ratio")) |>
      kable_styling(bootstrap_options = c("striped", "hover")) |>
      kableExtra::scroll_box( height = "500px")
Max Min Median SD Sharpe Ratio
0.019 -0.165 -0.014 0.032 -2.824
# export to LaTex
print(xtable(summary_table, digits = 3, caption = "Summary Statistics", label = "tab:summary_table"), 
      caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:12 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Summary Statistics}} 
## \label{tab:summary_table}
## \begin{tabular}{rrrrr}
##   \hline
## max & min & median & sd & sharpe \\ 
##   \hline
## 0.019 & -0.165 & -0.014 & 0.032 & -2.824 \\ 
##    \hline
## \end{tabular}
## \end{table}

Maximum drawdown

spy_df %>% 
  mutate(
    cum_ret = cumprod(1 + Strategy_Return) - 1,
    drawdown = cum_ret - cummax(cum_ret)
  ) %>% 
  summarise(
    max_drawdown = min(drawdown)
  ) %>% 
  kable(digits = 3, col.names = c("Max Drawdown")) %>%
      kable_styling(bootstrap_options = c("striped", "hover"))
Max Drawdown
-0.998

Problem 5

# Set up
set.seed(123)  # For reproducibility
options(scipen = n)

# Number of years
T <- 25

# Initial investment
initial_investment <- 1000000

# Risk-free asset return
rf <- 0.01

# Mean and covariance matrix for risky assets a and b
mu <- c(1, 1)  # Expected dividends
Sigma <- matrix(c(1, 0, 0, 1), nrow = 2)  # Covariance matrix

# Number of units purchased based on portfolio allocation
units_risk_free <- 5000  # Number of risk-free units (at $100 each)
units_a <- 11351  # Number of units of asset a
units_b <- 11351  # Number of units of asset b

# Number of simulations
num_simulations <- 20

# Initialize a data frame to store all simulation results
all_simulations <- data.frame()


#Run multiple simulations
for (sim in 1:num_simulations) {
  # Simulate the dividends for t = 1 to T
  dividends <- mvrnorm(n = T, mu = mu, Sigma = Sigma)
  
  # Calculate wealth over time
  wealth <- numeric(T)
  wealth[1] <- initial_investment  # Initial wealth
  
  for (t in 2:T) {
    # Wealth from risk-free asset
    wealth_risk_free <- units_risk_free * (100 + rf * 100 * t)  # Principal + cumulative coupon payments
    
    # Wealth from risky assets (dividends received)
    wealth_a <- units_a * sum(dividends[1:t, 1])
    wealth_b <- units_b * sum(dividends[1:t, 2])
    
    # Total wealth at time t
    wealth[t] <- wealth_risk_free + wealth_a + wealth_b
  }
  
  # Store the results in a data frame
  sim_results <- data.frame(Time = 1:T, Wealth = wealth, Simulation = as.factor(sim))
  all_simulations <- rbind(all_simulations, sim_results)
}

# Plot all simulations on one graph
sim_plot <- ggplot(all_simulations, aes(x = Time, y = Wealth, group = Simulation, color = Simulation)) +
  geom_line(size = 1, alpha = 0.7) +
  labs(title = "Wealth Over Time Across 20 Simulations",
       x = "Time (Years)",
       y = "Wealth ($)") +
  scale_y_continuous() +  
  theme_minimal() +
  theme(legend.position = "none")

print(sim_plot)

# Export the plot
ggsave("wealth_simulation.png", plot = sim_plot, width = 10, height = 6, dpi = 300)