The purpose of this script is to show the steps and decisions taken to complete the pre-assignment 1 for FINA 9210.
The data used are downloaded from Dropbox and saved locally in the same folder as this script.
stock <- read.csv("raw_returns.csv")
ticker <- read.csv("ticker_permno.csv")
stock$permno <- as.character(stock$permno)
ticker$permno <- as.character(ticker$permno)
stock %>%
group_by(ticker) %>%
summarise(
mean = mean(ret, na.rm = TRUE),
sd = sd(ret, na.rm = TRUE),
med = median(ret, na.rm = TRUE),
min = min(ret, na.rm = TRUE),
max = max(ret, na.rm = TRUE)
) %>%
kable(digits = 3, col.names = c("Ticker Symbol", "Mean", "SD", "Median", "Min", "Max")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Ticker Symbol | Mean | SD | Median | Min | Max |
---|---|---|---|---|---|
QQQ | 0.012 | 0.053 | 0.018 | -0.156 | 0.150 |
SHY | 0.001 | 0.004 | 0.001 | -0.014 | 0.018 |
SPY | 0.009 | 0.043 | 0.013 | -0.165 | 0.127 |
TLT | 0.004 | 0.040 | 0.003 | -0.131 | 0.143 |
VB | 0.009 | 0.056 | 0.015 | -0.219 | 0.187 |
VNQ | 0.008 | 0.064 | 0.014 | -0.317 | 0.307 |
VTV | 0.008 | 0.043 | 0.014 | -0.162 | 0.128 |
sum_table <- stock %>%
group_by(ticker) %>%
summarise(
mean = mean(ret, na.rm = TRUE),
sd = sd(ret, na.rm = TRUE),
med = median(ret, na.rm = TRUE),
min = min(ret, na.rm = TRUE),
max = max(ret, na.rm = TRUE)
)
print(xtable(sum_table, digits = 3, caption = "Summary Statistics", label = "tab:sum_table"),
caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:09 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Summary Statistics}}
## \label{tab:sum_table}
## \begin{tabular}{lrrrrr}
## \hline
## ticker & mean & sd & med & min & max \\
## \hline
## QQQ & 0.012 & 0.053 & 0.018 & -0.156 & 0.150 \\
## SHY & 0.001 & 0.004 & 0.001 & -0.014 & 0.018 \\
## SPY & 0.009 & 0.043 & 0.013 & -0.165 & 0.127 \\
## TLT & 0.004 & 0.040 & 0.003 & -0.131 & 0.143 \\
## VB & 0.009 & 0.056 & 0.015 & -0.219 & 0.187 \\
## VNQ & 0.008 & 0.064 & 0.014 & -0.317 & 0.307 \\
## VTV & 0.008 & 0.043 & 0.014 & -0.162 & 0.128 \\
## \hline
## \end{tabular}
## \end{table}
stock$date <- as.Date(stock$date, format = "%Y-%m-%d")
cum_return <- stock %>%
group_by(ticker) %>%
arrange(date) %>%
na.omit() %>% # Removes rows with NA values
mutate(cum_ret = cumprod(1 + ret) - 1) %>%
ggplot(aes(x = date, y = cum_ret, color = ticker)) +
geom_line() +
labs(title = "Cumulative Returns", x = "Date", y = "Cumulative Returns") +
theme_minimal() +
theme(legend.position = "right")
print(cum_return)
# Finding largest drawdown for each ticker
draw_down <- stock %>%
group_by(ticker) %>%
arrange(date) %>%
na.omit() %>%
mutate(cum_ret = cumprod(1 + ret) - 1) %>%
mutate(drawdown = cum_ret - cummax(cum_ret)) %>%
summarise(max_drawdown = min(drawdown))
draw_down %>%
kable(digits = 3, col.names = c("Ticker Symbol", "Max Drawdown")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Ticker Symbol | Max Drawdown |
---|---|
QQQ | -4.095 |
SHY | -0.075 |
SPY | -1.451 |
TLT | -1.667 |
VB | -1.467 |
VNQ | -1.615 |
VTV | -0.931 |
# export latex
print(xtable(draw_down, digits = 3, caption = "Max Drawdown", label = "tab:draw_down"),
caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:11 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Max Drawdown}}
## \label{tab:draw_down}
## \begin{tabular}{lr}
## \hline
## ticker & max\_drawdown \\
## \hline
## QQQ & -4.095 \\
## SHY & -0.075 \\
## SPY & -1.451 \\
## TLT & -1.667 \\
## VB & -1.467 \\
## VNQ & -1.615 \\
## VTV & -0.931 \\
## \hline
## \end{tabular}
## \end{table}
I absolutely don’t believe in technical analysis and trade stocks based on historical data. I am strong believer in semi-strong market efficiency. Or at the very least, I’ve never made money trading a stock based on charts or technical factors.
For this assignment, I will use Mean Reversion Strategy to trade SPY.
The strategy is as follows:
Calculate the Z-score of the stock returns based on a rolling mean and standard deviation.
Generate trading signals based on the Z-score. Go long when the Z-score is below a -0.5 and go short when the Z-score is above a 0.5. Stay neutral otherwise.
Z-score is calculated as:
\[ Z_{score} = \frac{ret - \text{10-month rolling average}}{\text{10-month rolling standard deviation}} \]
Rolling mean and standard deviation are calculated based on a 10-month look-back period.
Assuming only one stock is in the portfolio, SPY.
SPY <- stock %>%
filter(ticker == "SPY") %>%
dplyr::select(date, ret) %>%
arrange(date) %>%
na.omit()
# Calculate the Z-score
lookback_period <- 10 # 10-month look-back period
spy_df <- SPY %>%
mutate(
rolling_mean = rollapply(ret, width = lookback_period, FUN = mean, align = 'right', fill = NA),
rolling_sd = rollapply(ret, width = lookback_period, FUN = sd, align = 'right', fill = NA),
Z_score = (ret - rolling_mean) / rolling_sd
)
#Generate Trading Signals Based on Z-score
threshold <- 0.5 # Threshold for Z-score
spy_df <- spy_df %>%
mutate(
Signal = case_when(
Z_score < -threshold ~ 1, # Go Long when Z-score is below -threshold
Z_score > threshold ~ -1, # Go Short when Z-score is above threshold
TRUE ~ 0 # Stay Neutral if within the threshold range
)
)
# Backtest the Strategy
spy_df <- spy_df %>%
mutate(
Strategy_Return = Signal * ret,
Cumulative_Strategy_Return = cumprod(1 + Strategy_Return),
Cumulative_SPY_Return = cumprod(1 + ret)
)
#Evaluate the Performance
# Plot cumulative returns
mean_strategy <- ggplot(spy_df, aes(x = date)) +
geom_line(aes(y = Cumulative_Strategy_Return, color = "Strategy")) +
geom_line(aes(y = Cumulative_SPY_Return, color = "SPY")) +
labs(title = "Cumulative Returns: Mean Reversion Strategy vs SPY",
y = "Cumulative Return",
x = "Date") +
theme_minimal()
print(mean_strategy)
To calculate the Sharpe ratio, I used this formula
\[ \text{Sharpe Ratio} = \frac{R_p - R_f}{\sigma_p} \]
where:
\(R_p\) is the average return of the portfolio.
\(R_f\) is the risk-free rate. Currently assumed to be 3.89% (10 Y Treasury Rate as of 08/17/2024)
\(\sigma_p\) is the standard deviation of the portfolio returns.
summary_table <- spy_df %>%
summarise(
max = max(Strategy_Return, na.rm = TRUE),
min = min(Strategy_Return, na.rm = TRUE),
median = median(Strategy_Return, na.rm = TRUE),
sd = sd(Strategy_Return, na.rm = TRUE),
sharpe = {
# Calculate annualized return
annualized_return <- function(returns) {
n_months <- length(returns)
(prod(1 + returns)^(12 / n_months)) - 1
}
# Calculate annualized volatility
annualized_volatility <- function(returns) {
sd(returns) * sqrt(12)
}
# Sharpe ratio calculation
risk_free_rate <- 0.0398
ann_return <- annualized_return(Strategy_Return)
ann_volatility <- annualized_volatility(Strategy_Return)
(ann_return - risk_free_rate) / ann_volatility
}
)
summary_table %>%
kable(digits = 3, col.names = c("Max", "Min", "Median", "SD", "Sharpe Ratio")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Max | Min | Median | SD | Sharpe Ratio |
---|---|---|---|---|
0.019 | -0.165 | -0.014 | 0.032 | -2.824 |
# export to LaTex
print(xtable(summary_table, digits = 3, caption = "Summary Statistics", label = "tab:summary_table"),
caption.placement = "top", include.rownames = FALSE, caption.width = "0.5\\textwidth")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sun Aug 18 16:39:12 2024
## \begin{table}[ht]
## \centering
## \parbox{0.5\textwidth}{\caption{Summary Statistics}}
## \label{tab:summary_table}
## \begin{tabular}{rrrrr}
## \hline
## max & min & median & sd & sharpe \\
## \hline
## 0.019 & -0.165 & -0.014 & 0.032 & -2.824 \\
## \hline
## \end{tabular}
## \end{table}
# Set up
set.seed(123) # For reproducibility
options(scipen = n)
# Number of years
T <- 25
# Initial investment
initial_investment <- 1000000
# Risk-free asset return
rf <- 0.01
# Mean and covariance matrix for risky assets a and b
mu <- c(1, 1) # Expected dividends
Sigma <- matrix(c(1, 0, 0, 1), nrow = 2) # Covariance matrix
# Number of units purchased based on portfolio allocation
units_risk_free <- 5000 # Number of risk-free units (at $100 each)
units_a <- 11351 # Number of units of asset a
units_b <- 11351 # Number of units of asset b
# Number of simulations
num_simulations <- 20
# Initialize a data frame to store all simulation results
all_simulations <- data.frame()
#Run multiple simulations
for (sim in 1:num_simulations) {
# Simulate the dividends for t = 1 to T
dividends <- mvrnorm(n = T, mu = mu, Sigma = Sigma)
# Calculate wealth over time
wealth <- numeric(T)
wealth[1] <- initial_investment # Initial wealth
for (t in 2:T) {
# Wealth from risk-free asset
wealth_risk_free <- units_risk_free * (100 + rf * 100 * t) # Principal + cumulative coupon payments
# Wealth from risky assets (dividends received)
wealth_a <- units_a * sum(dividends[1:t, 1])
wealth_b <- units_b * sum(dividends[1:t, 2])
# Total wealth at time t
wealth[t] <- wealth_risk_free + wealth_a + wealth_b
}
# Store the results in a data frame
sim_results <- data.frame(Time = 1:T, Wealth = wealth, Simulation = as.factor(sim))
all_simulations <- rbind(all_simulations, sim_results)
}
# Plot all simulations on one graph
sim_plot <- ggplot(all_simulations, aes(x = Time, y = Wealth, group = Simulation, color = Simulation)) +
geom_line(size = 1, alpha = 0.7) +
labs(title = "Wealth Over Time Across 20 Simulations",
x = "Time (Years)",
y = "Wealth ($)") +
scale_y_continuous() +
theme_minimal() +
theme(legend.position = "none")
print(sim_plot)