The purpose of this script is to show the steps and decisions taken to complete the pre-assignment 2 for FINA 9210.
The data used are downloaded from Dropbox and saved locally in the same folder as this script.
The data is first downloaded from WRDS and stored locally, which will then used for analysis.
Data between 1990-01-01 and 2023-12-31.
wrds <- dbConnect(Postgres(),
host = 'wrds-pgdata.wharton.upenn.edu',
port = 9737,
user = 'ktl24537', # replace with your WRDS username
password = 'Letuankiet0101@', # replace with your WRDS password
sslmode = 'require',
dbname = 'wrds')
# SQL query to get ETF returns
query <- "
SELECT date, permno, ret, prc
FROM crsp_a_stock.msf
WHERE permno IN (SELECT permno FROM crsp.stocknames WHERE ticker IN ('SPY','QQQ','SHY','TLT','VB','VTV','VNQ'))
AND date BETWEEN '1990-01-01' AND '2023-12-31'
"
# Execute the query
etf_returns <- dbGetQuery(wrds, query)
# Close the connection
dbDisconnect(wrds)
# View the first few rows of the data
head(etf_returns)
# save as csv
write.csv(etf_returns, "returns.csv", row.names = FALSE)
stock <- read.csv("returns.csv")
ticker <- read.csv("ticker_permno.csv")
stock$permno <- as.character(stock$permno)
ticker$permno <- as.character(ticker$permno)
# For each ticker, keep observations of the most recent permno
ticker_list <- ticker %>%
group_by(ticker) %>%
filter(date == max(date)) %>%
ungroup() %>%
dplyr::select(ticker, permno)
stock <- stock %>%
left_join(ticker_list, by = "permno") %>%
dplyr::select(-permno)
stock$date <- as.Date(stock$date)
stock %>% count(ticker) %>% kable(digits = 3, col.names = c("Ticker Symbol", "N")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Ticker Symbol | N |
---|---|
QQQ | 299 |
SHY | 259 |
SPY | 372 |
TLT | 259 |
VB | 240 |
VNQ | 233 |
VTV | 240 |
# filter ticker to SPY and filter date to be between 1994 and 2003
spy_setA <- stock %>%
filter(ticker == "SPY") %>%
filter(date >= "1994-01-01" & date <= "2003-12-31") %>%
dplyr::select(date, ret, prc) %>%
mutate(SMA_2mo = zoo::rollmean(prc, k = 2, fill = NA, align = "right"),
SMA_6mo = zoo::rollmean(prc, k = 6, fill = NA, align = "right")) %>%
mutate(signal = 0) %>%
mutate(signal = ifelse(SMA_2mo > SMA_6mo, 1, signal)) %>%
mutate(signal = ifelse(SMA_2mo < SMA_6mo, -1, signal)) %>%
mutate(position = lag(signal, 1),
strat_ret = position * ret) %>%
mutate(cum_strat_ret = cumprod(coalesce(1 + strat_ret, 1)) - 1,
cum_spy_ret = cumprod(coalesce(1 + ret, 1)) - 1) %>%
mutate(log_strat_ret = log(1 + cum_strat_ret),
log_spy_ret = log(1 + cum_spy_ret))
seta <- ggplot(data = spy_setA, aes(x = date)) +
geom_line(aes(y = log_strat_ret, color = "Strategy Returns"), size = 1) +
geom_line(aes(y = log_spy_ret, color = "SPY Returns"), size = 1) +
labs(title = "SMA Crossover Strategy vs SPY Returns",
x = "Date",
y = "Log Cumulative Return") +
scale_color_manual(values = c("Strategy Returns" = "blue", "SPY Returns" = "red")) +
theme_minimal() +
theme(legend.title = element_blank()) # Remove legend title
print(seta)
# filter ticker to SPY and filter date to be between 2004 and 2023
spy_setB <- stock %>%
filter(ticker == "SPY") %>%
filter(date >= "2004-01-01" & date <= "2023-12-31") %>%
dplyr::select(date, ret, prc) %>%
mutate(SMA_2mo = zoo::rollmean(prc, k = 2, fill = NA, align = "right"),
SMA_6mo = zoo::rollmean(prc, k = 6, fill = NA, align = "right")) %>%
mutate(signal = 0) %>%
mutate(signal = ifelse(SMA_2mo > SMA_6mo, 1, signal)) %>%
mutate(signal = ifelse(SMA_2mo < SMA_6mo, -1, signal)) %>%
mutate(position = lag(signal, 1),
strat_ret = position * ret) %>%
mutate(cum_strat_ret = cumprod(coalesce(1 + strat_ret, 1)) - 1,
cum_spy_ret = cumprod(coalesce(1 + ret, 1)) - 1) %>%
mutate(log_strat_ret = log(1 + cum_strat_ret),
log_spy_ret = log(1 + cum_spy_ret))
# Plot cumulative returns by month
setb <- ggplot(data = spy_setB, aes(x = date)) +
geom_line(aes(y = log_strat_ret, color = "Strategy Returns"), size = 1) +
geom_line(aes(y = log_spy_ret, color = "SPY Returns"), size = 1) +
labs(title = "SMA Crossover Strategy vs SPY Returns",
x = "Date",
y = "Log Cumulative Return") +
scale_color_manual(values = c("Strategy Returns" = "blue", "SPY Returns" = "red")) +
theme_minimal() +
theme(legend.title = element_blank()) # Remove legend title
print(setb)
# Looking at the whole period from 1994 to 2023
spy_setC <- stock %>%
filter(ticker == "SPY") %>%
filter(date >= "1994-01-01" & date <= "2023-12-31") %>%
dplyr::select(date, ret, prc) %>%
mutate(SMA_2mo = zoo::rollmean(prc, k = 2, fill = NA, align = "right"),
SMA_6mo = zoo::rollmean(prc, k = 6, fill = NA, align = "right")) %>%
mutate(signal = 0) %>%
mutate(signal = ifelse(SMA_2mo > SMA_6mo, 1, signal)) %>%
mutate(signal = ifelse(SMA_2mo < SMA_6mo, -1, signal)) %>%
mutate(position = lag(signal, 1),
strat_ret = position * ret) %>%
mutate(cum_strat_ret = cumprod(coalesce(1 + strat_ret, 1)) - 1,
cum_spy_ret = cumprod(coalesce(1 + ret, 1)) - 1) %>%
mutate(log_strat_ret = log(1 + cum_strat_ret),
log_spy_ret = log(1 + cum_spy_ret))
# Plot
set_c<- ggplot(data = spy_setC, aes(x = date)) +
geom_line(aes(y = log_strat_ret, color = "Strategy Returns"), size = 1) +
geom_line(aes(y = log_spy_ret, color = "SPY Returns"), size = 1) +
labs(title = "SMA Crossover Strategy vs SPY Returns",
x = "Date",
y = "Log Cumulative Return") +
scale_color_manual(values = c("Strategy Returns" = "blue", "SPY Returns" = "red")) +
theme_minimal() +
theme(legend.title = element_blank()) # Remove legend title
print(set_c)
sum_setA <- spy_setA %>%
dplyr::select(strat_ret) %>%
mutate(set = "1994_2003")
sum_setB <- spy_setB %>%
dplyr::select(strat_ret) %>%
mutate(set = "2004_2023")
combine <- rbind(sum_setA, sum_setB)
combine_sum <- combine %>%
group_by(set) %>%
summarise(
mean = mean(strat_ret, na.rm = TRUE),
max = max(strat_ret, na.rm = TRUE),
min = min(strat_ret, na.rm = TRUE),
median = median(strat_ret, na.rm = TRUE),
sd = sd(strat_ret, na.rm = TRUE))
combine_sum %>%
kable(digits = 3, col.names = c("Date Range", "Mean", "Max", "Min", "Median", "SD")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Date Range | Mean | Max | Min | Median | SD |
---|---|---|---|---|---|
1994_2003 | 0.004 | 0.105 | -0.141 | 0.008 | 0.047 |
2004_2023 | 0.006 | 0.165 | -0.127 | 0.009 | 0.044 |
#print LaTex
print(xtable(combine_sum, digits = 3, caption = "Summary Statistics", label = "tab:summary"), type = "latex")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sat Aug 24 23:48:49 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlrrrrr}
## \hline
## & set & mean & max & min & median & sd \\
## \hline
## 1 & 1994\_2003 & 0.004 & 0.105 & -0.141 & 0.008 & 0.047 \\
## 2 & 2004\_2023 & 0.006 & 0.165 & -0.127 & 0.009 & 0.044 \\
## \hline
## \end{tabular}
## \caption{Summary Statistics}
## \label{tab:summary}
## \end{table}
max_drawdown_setA <- -maxDrawdown(spy_setA$strat_ret)
max_drawdown_setB <- -maxDrawdown(spy_setB$strat_ret)
# rounding
max_drawdown_setA <- round(max_drawdown_setA, 3)
max_drawdown_setB <- round(max_drawdown_setB, 3)
# set as character
max_drawdown_setA <- as.character(max_drawdown_setA)
max_drawdown_setB <- as.character(max_drawdown_setB)
During period 1994-2003, the maximum drawdown is -0.312. During period 2004-2023, the maximum drawdown is -0.255.
sum_setA <- spy_setA %>%
dplyr::select(date, cum_strat_ret, cum_spy_ret, log_strat_ret, log_spy_ret) %>%
mutate(set = "1994_2003")
sum_setB <- spy_setB %>%
dplyr::select(date, cum_strat_ret, cum_spy_ret, log_strat_ret, log_spy_ret) %>%
mutate(set = "2004_2023")
combine <- rbind(sum_setA, sum_setB)
combine <- combine %>%
mutate(diff_cum_ret = cum_strat_ret - cum_spy_ret,
diff_log_ret = log_strat_ret - log_spy_ret)
# Plot diff in log returns
log_diff <- ggplot(data = combine, aes(x = date, y = diff_log_ret)) +
geom_line(aes(color = set), size = 1) +
labs(title = "Difference in Log Cumulative Returns (Strategy - SPY)",
x = "Date",
y = "Difference in Log Cumulative Returns") +
scale_color_manual(values = c("1994_2003" = "brown", "2004_2023" = "violet")) +
theme_minimal() +
theme(legend.title = element_blank()) # Remove legend title
print(log_diff)
ggsave("log_diff.png", log_diff, width = 10, height = 6, dpi = 300)
# Plot diff in cumulative returns
cum_diff <- ggplot(data = combine, aes(x = date, y = diff_cum_ret)) +
geom_line(aes(color = set), size = 1) +
labs(title = "Difference in Cumulative Returns (Strategy - SPY)",
x = "Date",
y = "Difference in Cumulative Returns %") +
scale_color_manual(values = c("1994_2003" = "brown", "2004_2023" = "violet")) +
theme_minimal() +
theme(legend.title = element_blank()) # Remove legend title
print(cum_diff)
wrds <- dbConnect(Postgres(),
host = 'wrds-pgdata.wharton.upenn.edu',
port = 9737,
user = 'ktl24537', # replace with your WRDS username
password = 'Letuankiet0101@', # replace with your WRDS password
sslmode = 'require',
dbname = 'wrds')
# Query to get PERMNOs for U.S. common stocks (shrcd = 10, 11) with end-of-month prices < $5
permnos_query <- "
SELECT DISTINCT permno
FROM crsp.msf
WHERE shrcd IN (10, 11)
AND prc < 5
AND date BETWEEN '1999-01-01' AND '2003-12-31'
AND date = (SELECT MAX(date) FROM crsp.msf WHERE date BETWEEN '1999-01-01' AND '2003-12-31')
"
# Query to get PERMNOs for U.S. common stocks (shrcd = 10, 11) with end-of-month prices < $5
permnos_query <- "
SELECT DISTINCT msf.permno
FROM crsp.msf AS msf
JOIN crsp.stocknames AS sn ON msf.permno = sn.permno
WHERE sn.shrcd IN (10, 11)
AND msf.prc < 5
AND msf.date BETWEEN '1999-01-01' AND '2003-12-31'
AND msf.date = (SELECT MAX(date) FROM crsp.msf WHERE date BETWEEN '1999-01-01' AND '2003-12-31')
"
# Fetch the PERMNOs
permnos <- dbGetQuery(wrds, permnos_query)
# Convert permnos to a vector
permno_vector <- permnos$permno
# Query to get daily returns for the selected PERMNOs
daily_returns_query <- sprintf("
SELECT date, permno, ret, prc, shrout
FROM crsp.dsf
WHERE permno IN (%s)
AND date BETWEEN '1999-01-01' AND '2003-12-31'
", paste(permno_vector, collapse = ","))
# Fetch the daily returns data
daily_returns <- dbGetQuery(wrds, daily_returns_query)
# Close the WRDS connection
dbDisconnect(wrds)
# View the first few rows of the data
head(daily_returns)
# Save locally
write.csv(daily_returns, "permno_returns.csv", row.names = FALSE)
# value weighted
vw_returns <- permno_returns %>%
mutate(market_cap = prc * shrout) %>%
group_by(date) %>%
mutate(total_market_cap = sum(market_cap, na.rm = TRUE)) %>%
ungroup() %>%
mutate(weight = market_cap / total_market_cap) %>%
group_by(date) %>%
summarise(ret = sum(weight * ret, na.rm = TRUE)) %>%
ungroup() %>%
mutate(date = as.Date(date)) %>%
mutate(set = "Value Weighted")
# equal weighted
ew_returns <- permno_returns %>%
group_by(date) %>%
summarise(ret = mean(ret, na.rm = TRUE)) %>%
ungroup() %>%
mutate(date = as.Date(date)) %>%
mutate(set = "Equal Weighted")
# combine
combine_returns <- rbind(vw_returns, ew_returns)
# summary statistics
combine_sum <- combine_returns %>%
group_by(set) %>%
summarise(
mean = mean(ret, na.rm = TRUE),
max = max(ret, na.rm = TRUE),
min = min(ret, na.rm = TRUE),
median = median(ret, na.rm = TRUE),
sd = sd(ret, na.rm = TRUE),
annualized_mean = mean * 252,
annualized_sd = sd * sqrt(252))
combine_sum %>%
kable(digits = 3, col.names = c("Set", "Mean", "Max", "Min", "Median", "SD", "Annualized Daily Mean", "Annualized Daily SD")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Set | Mean | Max | Min | Median | SD | Annualized Daily Mean | Annualized Daily SD |
---|---|---|---|---|---|---|---|
Equal Weighted | 0.001 | 0.078 | -0.091 | 0.002 | 0.012 | 0.373 | 0.192 |
Value Weighted | 0.001 | 0.180 | -0.088 | 0.002 | 0.027 | 0.328 | 0.436 |
# print LaTex
print(xtable(combine_sum, digits = 3, caption = "Summary Statistics", label = "tab:summary"), type = "latex")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sat Aug 24 23:49:09 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlrrrrrrr}
## \hline
## & set & mean & max & min & median & sd & annualized\_mean & annualized\_sd \\
## \hline
## 1 & Equal Weighted & 0.001 & 0.078 & -0.091 & 0.002 & 0.012 & 0.373 & 0.192 \\
## 2 & Value Weighted & 0.001 & 0.180 & -0.088 & 0.002 & 0.027 & 0.328 & 0.436 \\
## \hline
## \end{tabular}
## \caption{Summary Statistics}
## \label{tab:summary}
## \end{table}
# Getting monthly return
monthly_returns <- permno_returns %>%
mutate(date = as.Date(date)) %>%
mutate(year = year(date),
month = month(date)) %>%
group_by(year, month) %>%
summarise(ret = mean(ret, na.rm = TRUE)) %>%
ungroup() %>%
mutate(date = as.Date(paste(year, month, "01", sep = "-")))
fama_2 <- fama %>%
mutate(market = mktrf + rf) %>%
mutate(date = as.Date(dateff)) %>%
summarise(mean = mean(market, na.rm = TRUE),
max = max(market, na.rm = TRUE),
min = min(market, na.rm = TRUE),
median = median(market, na.rm = TRUE),
sd = sd(market, na.rm = TRUE),
annualized_mean = mean * 12,
annualized_sd = sd * sqrt(12))
fama_2 %>%
kable(digits = 3, col.names = c("Mean", "Max", "Min", "Median", "SD", "Annualized Mean", "Annualized SD")) |>
kable_styling(bootstrap_options = c("striped", "hover")) |>
kableExtra::scroll_box( height = "500px")
Mean | Max | Min | Median | SD | Annualized Mean | Annualized SD |
---|---|---|---|---|---|---|
0.009 | 0.136 | -0.172 | 0.014 | 0.045 | 0.109 | 0.157 |
# print LaTex
print(xtable(fama_2, digits = 3, caption = "Summary Statistics for Market Portfolio", label = "tab:summary"), type = "latex")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sat Aug 24 23:49:38 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrr}
## \hline
## & mean & max & min & median & sd & annualized\_mean & annualized\_sd \\
## \hline
## 1 & 0.009 & 0.137 & -0.172 & 0.014 & 0.045 & 0.109 & 0.157 \\
## \hline
## \end{tabular}
## \caption{Summary Statistics for Market Portfolio}
## \label{tab:summary}
## \end{table}
# merge to vw return using date
vw_returns <- merge(vw_returns, daily_rf, by = "date")
ew_returns <- merge(ew_returns, daily_rf, by = "date")
vw_returns <- vw_returns %>%
mutate(rt_minus_rf = ret - rf)
ew_returns <- ew_returns %>%
mutate(rt_minus_rf = ret - rf)
# sharpe ratio
vw_sharpe <- mean(vw_returns$rt_minus_rf, na.rm = TRUE) / sd(vw_returns$rt_minus_rf, na.rm = TRUE) * sqrt(252)
ew_sharpe <- mean(ew_returns$rt_minus_rf, na.rm = TRUE) / sd(ew_returns$rt_minus_rf, na.rm = TRUE) * sqrt(252)
vw_sharpe <- round(vw_sharpe, 3)
ew_sharpe <- round(ew_sharpe, 3)
vw_sharpe <- as.character(vw_sharpe)
ew_sharpe <- as.character(ew_sharpe)
The Sharpe ratio for the value-weighted portfolio is 0.676, and the Sharpe ratio for the equal-weighted portfolio is 1.765.
# Extract year and month from the date
daily_returns <- permno_returns %>%
mutate(date = as.Date(date)) %>%
mutate(year = format(date, "%Y"),
month = format(date, "%m"))
# Calculate the standard deviation of daily returns for each stock in each month
monthly_sd <- daily_returns %>%
group_by(permno, year, month) %>%
summarise(sd = sd(ret, na.rm = TRUE)) %>%
ungroup()
# Calculate the cross-sectional median of the standard deviations for each month
monthly_sd <- monthly_sd %>%
group_by(year, month) %>%
mutate(median_sd = median(sd, na.rm = TRUE),
sd_group = ifelse(sd > median_sd, "high", "low")) %>%
filter (!is.na(sd_group)) %>%
ungroup()
# Merge the SD classification back with daily returns
daily_returns <- merge(daily_returns, monthly_sd, by = c("permno", "year", "month"))
# Calculate portfolio returns for each subsample
portfolio_returns <- daily_returns %>%
mutate(market_cap = prc * shrout) %>%
group_by(year, month, date, sd_group) %>%
summarise(
ew_return = mean(ret, na.rm = TRUE), # Equally-weighted
vw_return = sum(ret * market_cap, na.rm = TRUE) / sum(market_cap, na.rm = TRUE) # Value-weighted
) %>%
ungroup()
# Calculate summary statistics for each subsample
summary_stats <- portfolio_returns %>%
group_by(sd_group) %>%
summarise(
max_ew_return = max(ew_return, na.rm = TRUE),
min_ew_return = min(ew_return, na.rm = TRUE),
mean_ew_return = mean(ew_return, na.rm = TRUE),
median_ew_return = median(ew_return, na.rm = TRUE),
sd_ew_return = sd(ew_return, na.rm = TRUE),
annualized_mean_ew = mean(ew_return, na.rm = TRUE) * 252,
annualized_sd_ew = sd(ew_return, na.rm = TRUE) * sqrt(252),
sharpe_ratio_ew = (mean(ew_return, na.rm = TRUE) * 252) / (sd(ew_return, na.rm = TRUE) * sqrt(252)),
max_vw_return = max(vw_return, na.rm = TRUE),
min_vw_return = min(vw_return, na.rm = TRUE),
mean_vw_return = mean(vw_return, na.rm = TRUE),
median_vw_return = median(vw_return, na.rm = TRUE),
sd_vw_return = sd(vw_return, na.rm = TRUE),
annualized_mean_vw = mean(vw_return, na.rm = TRUE) * 252,
annualized_sd_vw = sd(vw_return, na.rm = TRUE) * sqrt(252),
sharpe_ratio_vw = (mean(vw_return, na.rm = TRUE) * 252) / (sd(vw_return, na.rm = TRUE) * sqrt(252))
)
# Calculate the difference between EW and VW portfolios for both high and low SD groups
summary_stats <- summary_stats %>%
mutate(difference_ew_vw = mean_ew_return - mean_vw_return)
# View the summary statistics
print(summary_stats)
## # A tibble: 2 × 18
## sd_group max_ew_return min_ew_return mean_ew_return median_ew_return
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 high 0.134 -0.138 0.00366 0.00440
## 2 low 0.0224 -0.0435 -0.000701 -0.000285
## # ℹ 13 more variables: sd_ew_return <dbl>, annualized_mean_ew <dbl>,
## # annualized_sd_ew <dbl>, sharpe_ratio_ew <dbl>, max_vw_return <dbl>,
## # min_vw_return <dbl>, mean_vw_return <dbl>, median_vw_return <dbl>,
## # sd_vw_return <dbl>, annualized_mean_vw <dbl>, annualized_sd_vw <dbl>,
## # sharpe_ratio_vw <dbl>, difference_ew_vw <dbl>
# print Latex
print(xtable(summary_stats, digits = 3, caption = "Summary Statistics for High and Low SD Groups", label = "tab:summary"), type = "latex")
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Sat Aug 24 23:51:01 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlrrrrrrrrrrrrrrrrr}
## \hline
## & sd\_group & max\_ew\_return & min\_ew\_return & mean\_ew\_return & median\_ew\_return & sd\_ew\_return & annualized\_mean\_ew & annualized\_sd\_ew & sharpe\_ratio\_ew & max\_vw\_return & min\_vw\_return & mean\_vw\_return & median\_vw\_return & sd\_vw\_return & annualized\_mean\_vw & annualized\_sd\_vw & sharpe\_ratio\_vw & difference\_ew\_vw \\
## \hline
## 1 & high & 0.134 & -0.138 & 0.004 & 0.004 & 0.019 & 0.923 & 0.294 & 3.136 & 0.253 & -0.149 & 0.004 & 0.004 & 0.039 & 1.030 & 0.618 & 1.667 & -0.000 \\
## 2 & low & 0.022 & -0.043 & -0.001 & -0.000 & 0.006 & -0.177 & 0.099 & -1.787 & 0.100 & -0.100 & 0.000 & 0.000 & 0.022 & 0.052 & 0.350 & 0.148 & -0.001 \\
## \hline
## \end{tabular}
## \caption{Summary Statistics for High and Low SD Groups}
## \label{tab:summary}
## \end{table}