knitr::opts_chunk$set(echo = TRUE)
Repeat the cross-sectional stock return predictability discussed in the class with the following
rm(list=ls())
rm(list = ls())
library(data.table)
source("C:/Users/user/Documents/OneDrive/桌面/機器學習與財務計量/Database/some_useful_functions.R")
ff5f <- fread("C:/Users/user/Documents/OneDrive/桌面/機器學習與財務計量/Database/ff5f_monthly.csv")
dat = setDT( readRDS("C:/Users/user/Documents/OneDrive/桌面/機器學習與財務計量/HW/HW2/DATA_for_Monthly_rebalancing.RDS") )
d = sort( unique(dat$eom) ) # take out the each value of eom and sort
result_dat <- dat[,.(mktcap_lag20 = quantile(mktcap_lag, 0.2)), keyby = eom] # calculate the 20th percentile for each market_lag
dat <- merge(dat, result_dat, by = "eom")
Exclude the stocks with mktcap_lag less than the 20% quantile
dat80 <- dat[dat$mktcap_lag >= dat$mktcap_lag20,]
rm(dat)
frank.dat80[,.N, by = "accrual_lag"] # Confirm the quantity for subsequent proofreading
pred_name <- names(dat80)[10:39]
col <- pred_name
dat80[, (paste0(col, "_rank")) := lapply(.SD, function(x) frank(x, ties.method = "min")),
by = "eom", .SDcols = pred_name]
head(dat80)
dat80[,.N, by = "accrual_lag"] # Confirm the quantity, it should not change
Just leave the sorted data, reducing the burden of running data backwards.
names(dat80)
## [1] "eom" "permno"
## [3] "ret_adj" "mktcap"
## [5] "mktcap_lag" "exchange"
## [7] "industry" "altprc"
## [9] "shrout" "accrual_lag"
## [11] "aftret_invcapx_lag" "at_turn_lag"
## [13] "bm_lag" "CAPEI_lag"
## [15] "cash_debt_lag" "cash_lt_lag"
## [17] "de_ratio_lag" "debt_at_lag"
## [19] "debt_ebitda_lag" "equity_invcap_lag"
## [21] "evm_lag" "gpm_lag"
## [23] "GProf_lag" "lt_debt_lag"
## [25] "lt_ppent_lag" "npm_lag"
## [27] "pay_turn_lag" "pcf_lag"
## [29] "pe_exi_lag" "ps_lag"
## [31] "ptb_lag" "quick_ratio_lag"
## [33] "rect_act_lag" "rect_turn_lag"
## [35] "roa_lag" "roe_lag"
## [37] "sale_equity_lag" "sale_nwc_lag"
## [39] "short_debt_lag" "mktcap_lag20"
## [41] "accrual_lag_rank" "aftret_invcapx_lag_rank"
## [43] "at_turn_lag_rank" "bm_lag_rank"
## [45] "CAPEI_lag_rank" "cash_debt_lag_rank"
## [47] "cash_lt_lag_rank" "de_ratio_lag_rank"
## [49] "debt_at_lag_rank" "debt_ebitda_lag_rank"
## [51] "equity_invcap_lag_rank" "evm_lag_rank"
## [53] "gpm_lag_rank" "GProf_lag_rank"
## [55] "lt_debt_lag_rank" "lt_ppent_lag_rank"
## [57] "npm_lag_rank" "pay_turn_lag_rank"
## [59] "pcf_lag_rank" "pe_exi_lag_rank"
## [61] "ps_lag_rank" "ptb_lag_rank"
## [63] "quick_ratio_lag_rank" "rect_act_lag_rank"
## [65] "rect_turn_lag_rank" "roa_lag_rank"
## [67] "roe_lag_rank" "sale_equity_lag_rank"
## [69] "sale_nwc_lag_rank" "short_debt_lag_rank"
dat80[, c("accrual_lag", "aftret_invcapx_lag", "at_turn_lag",
"bm_lag", "CAPEI_lag", "cash_debt_lag", "cash_lt_lag",
"de_ratio_lag", "debt_at_lag", "debt_ebitda_lag",
"equity_invcap_lag", "evm_lag", "gpm_lag", "GProf_lag",
"lt_debt_lag", "lt_ppent_lag", "npm_lag", "pay_turn_lag",
"pcf_lag", "pe_exi_lag", "ps_lag", "ptb_lag", "quick_ratio_lag",
"rect_act_lag", "rect_turn_lag", "roa_lag", "roe_lag",
"sale_equity_lag", "sale_nwc_lag", "short_debt_lag"):=NULL]
ret_adj, use
all of the 30 predictors(accrual_lag_rank, …… ,
short_debt_lag), and re-estimate the linear model after
12 months.#-------------------------------------------------------------------#
# Use 30 years of data to build linear model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
## Training data: 1971-07-31 ~ 2001-06-30
## Prediction: 2001-07-31 ~ 2002-06-30
## Training data: 1972-07-31 ~ 2002-06-30
## Prediction: 2002-07-31 ~ 2003-06-30
## Training data: 1973-07-31 ~ 2003-06-30
## Prediction: 2003-07-31 ~ 2004-06-30
## Training data: 1974-07-31 ~ 2004-06-30
## Prediction: 2004-07-31 ~ 2005-06-30
## Training data: 1975-07-31 ~ 2005-06-30
## Prediction: 2005-07-31 ~ 2006-06-30
## Training data: 1976-07-31 ~ 2006-06-30
## Prediction: 2006-07-31 ~ 2007-06-30
## Training data: 1977-07-31 ~ 2007-06-30
## Prediction: 2007-07-31 ~ 2008-06-30
## Training data: 1978-07-31 ~ 2008-06-30
## Prediction: 2008-07-31 ~ 2009-06-30
## Training data: 1979-07-31 ~ 2009-06-30
## Prediction: 2009-07-31 ~ 2010-06-30
## Training data: 1980-07-31 ~ 2010-06-30
## Prediction: 2010-07-31 ~ 2011-06-30
## Training data: 1981-07-31 ~ 2011-06-30
## Prediction: 2011-07-31 ~ 2012-06-30
## Training data: 1982-07-31 ~ 2012-06-30
## Prediction: 2012-07-31 ~ 2013-06-30
## Training data: 1983-07-31 ~ 2013-06-30
## Prediction: 2013-07-31 ~ 2014-06-30
## Training data: 1984-07-31 ~ 2014-06-30
## Prediction: 2014-07-31 ~ 2015-06-30
## Training data: 1985-07-31 ~ 2015-06-30
## Prediction: 2015-07-31 ~ 2016-06-30
## Training data: 1986-07-31 ~ 2016-06-30
## Prediction: 2016-07-31 ~ 2017-06-30
## Training data: 1987-07-31 ~ 2017-06-30
## Prediction: 2017-07-31 ~ 2018-06-30
## Training data: 1988-07-31 ~ 2018-06-30
## Prediction: 2018-07-31 ~ 2019-06-30
## Training data: 1989-07-31 ~ 2019-06-30
## Prediction: 2019-07-31 ~ 2020-06-30
## Training data: 1990-07-31 ~ 2020-06-30
## Prediction: 2020-07-31 ~ 2021-06-30
## Training data: 1991-07-31 ~ 2021-06-30
## Prediction: 2021-07-31 ~ 2022-06-30
names(dat80)
## [1] "eom" "permno"
## [3] "ret_adj" "mktcap"
## [5] "mktcap_lag" "exchange"
## [7] "industry" "altprc"
## [9] "shrout" "mktcap_lag20"
## [11] "accrual_lag_rank" "aftret_invcapx_lag_rank"
## [13] "at_turn_lag_rank" "bm_lag_rank"
## [15] "CAPEI_lag_rank" "cash_debt_lag_rank"
## [17] "cash_lt_lag_rank" "de_ratio_lag_rank"
## [19] "debt_at_lag_rank" "debt_ebitda_lag_rank"
## [21] "equity_invcap_lag_rank" "evm_lag_rank"
## [23] "gpm_lag_rank" "GProf_lag_rank"
## [25] "lt_debt_lag_rank" "lt_ppent_lag_rank"
## [27] "npm_lag_rank" "pay_turn_lag_rank"
## [29] "pcf_lag_rank" "pe_exi_lag_rank"
## [31] "ps_lag_rank" "ptb_lag_rank"
## [33] "quick_ratio_lag_rank" "rect_act_lag_rank"
## [35] "rect_turn_lag_rank" "roa_lag_rank"
## [37] "roe_lag_rank" "sale_equity_lag_rank"
## [39] "sale_nwc_lag_rank" "short_debt_lag_rank"
pred_name <- names(dat80[,11:40]) # take out the predictors
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("ret_adj",pred_name), with = FALSE]
#----- where the magical prediction happens ----#
lm_mdl = lm(ret_adj ~ ., data=train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]), # 呼叫i+1 ~ i+12的樣本
pred := predict(lm_mdl, newdata = x_test ) ]
#----- end of prediction task ----#
}
mkt_cap_lag as the
portfolio weight.Check the maximum and minimum values of the predicted values.
Range = dat80[, as.list(range(pred, na.rm=T)), keyby = eom]
dat_for_portfolio = na.omit(dat80[,.(eom, ret_adj, mktcap_lag, pred)],"pred")
dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by = eom]
# group by 20th percentile
Calculate the weighted average rate of return for each portfolio by
the weight of mkt_cap_lag.
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(quintile_portf, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x = TRUE)
quintile_portf[, excess_ret := 100*ret - RF]
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns, volatility, and Sharpe ratio of each portfolio.
m <- unlist( quintile_portf[, lapply(.SD, mean), .SDcols = 2L:6L] )
s <- unlist( quintile_portf[, lapply(.SD, sd), .SDcols = 2L:6L] )
sharpe_ratio <- sqrt(12)*m/s
m
## 1 2 3 4 5
## 0.4515270 0.6009793 0.7363267 0.9246373 0.8753206
s
## 1 2 3 4 5
## 4.780716 4.585551 4.573277 5.064850 5.092912
sharpe_ratio
## 1 2 3 4 5
## 0.3271759 0.4540028 0.5577425 0.6324052 0.5953764
The sorting based on linear regression prediction does not generate exactly monotonically increasing/decreasing portfolio performance. Because the fifth portfolio performed worse than the fourth portfolio with a lower Sharpe ratio.
Calculate the cumulated returns of long-short portfolios.
ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
ls_portf_wealth = cumprod(1 + ls_portf/100)
library(xts)
ls_portf_wealth = xts(ls_portf_wealth, as.Date(quintile_portf[,eom]) )
plot(ls_portf_wealth)
Calculate the Sharpe ratio of the long-short portfolio constructed with linear regression prediction.
sqrt(12)*mean(ls_portf)/sd(ls_portf)
## [1] 0.6063449
bm)benchmark_portfolio_B = na.omit(dat80,"pred")
benchmark_portfolio_B[, pred := bm_lag_rank]
Divide into five portfolio groups according to each 20th percentile of the predicted value under each eom.
benchmark_portfolio_B[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by = eom] #
Each portfolio group calculates the rate of return based on the weighted average of the market capitalization of each company.
bm_portf_B <- benchmark_portfolio_B[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(bm_portf_B, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
bm_portf_B <- merge(bm_portf_B, ff5f[,.(eom,RF)], all.x = TRUE)
bm_portf_B[, excess_ret := 100*ret - RF]
bm_portf_B <- dcast(bm_portf_B, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
bm_m_B <- unlist( bm_portf_B[, lapply(.SD,mean), .SDcols = 2L:6L] )
bm_s_B <- unlist( bm_portf_B[, lapply(.SD,sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
bm_ls_portf_B <- unlist( bm_portf_B[,6L] - bm_portf_B[,2L] )
bm_ls_portf_wealth_B = cumprod(1 + bm_ls_portf_B/100)
library(xts)
bm_ls_portf_wealth_B = xts(bm_ls_portf_wealth_B, as.Date(bm_portf_B[,eom]) )
lines(bm_ls_portf_wealth_B, col = 2)
Calculate the Sharpe ratio of the long-short
quintile sorted portfolio based on the Book-to-market
ratio(bm).
sqrt(12)*mean(bm_ls_portf_B)/sd(bm_ls_portf_B)
## [1] 0.1111413
roe)benchmark_portfolio_E = na.omit(dat80,"pred")
benchmark_portfolio_E[, pred := roe_lag_rank]
Divide into five portfolio groups according to each 20th percentile of the predicted value under each eom.
benchmark_portfolio_E[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by = eom]
Each portfolio group calculates the rate of return based on the weighted average of the market capitalization of each company.
bm_portf_E <- benchmark_portfolio_E[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(bm_portf_E, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
bm_portf_E <- merge(bm_portf_E, ff5f[,.(eom,RF)], all.x = TRUE)
bm_portf_E[, excess_ret := 100*ret - RF]
bm_portf_E <- dcast(bm_portf_E, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
bm_m_E <- unlist( bm_portf_E[, lapply(.SD,mean), .SDcols = 2L:6L] )
bm_s_E <- unlist( bm_portf_E[, lapply(.SD,sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
bm_ls_portf_E <- unlist( bm_portf_E[,6L] - bm_portf_E[,2L] )
bm_ls_portf_wealth_E = cumprod(1 + bm_ls_portf_E/100)
library(xts)
bm_ls_portf_wealth_E = xts(bm_ls_portf_wealth_E, as.Date(bm_portf_E[,eom]) )
lines(bm_ls_portf_wealth_E, col = 3)
Calculate the Sharpe ratio of the long-short
quintile sorted portfolio based on the Return on
equity(roe).
sqrt(12)*mean(bm_ls_portf_E)/sd(bm_ls_portf_E)
## [1] 0.0983983
gpm)benchmark_portfolio_G = na.omit(dat80,"pred")
benchmark_portfolio_G[, pred := gpm_lag_rank]
Divide into five portfolio groups according to each 20th percentile of the predicted value under each eom.
benchmark_portfolio_G[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by = eom]
Each portfolio group calculates the rate of return based on the weighted average of the market capitalization of each company.
bm_portf_G <- benchmark_portfolio_G[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(bm_portf_G, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
bm_portf_G <- merge(bm_portf_G, ff5f[,.(eom,RF)], all.x = TRUE)
bm_portf_G[, excess_ret := 100*ret - RF]
bm_portf_G <- dcast(bm_portf_G, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
bm_m_G <- unlist( bm_portf_G[, lapply(.SD,mean), .SDcols = 2L:6L] )
bm_s_G <- unlist( bm_portf_G[, lapply(.SD,sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
bm_ls_portf_G <- unlist( bm_portf_G[,6L] - bm_portf_G[,2L] )
bm_ls_portf_wealth_G = cumprod(1 + bm_ls_portf_G/100)
library(xts)
bm_ls_portf_wealth_G = xts(bm_ls_portf_wealth_G, as.Date(bm_portf_G[,eom]) )
lines(bm_ls_portf_wealth_G, col = 4)
Calculate the Sharpe ratio of the long-short
quintile sorted portfolio based on the Gross-profit
margin(gpm).
sqrt(12)*mean(bm_ls_portf_G)/sd(bm_ls_portf_G)
## [1] -0.032847
Transform the ret_adj into binary
variable y = 1 if ret_adj is positive and y = 0 if
ret_adj is less than or equal to zero.
Repeat Problem 1, but now in the Step 3, use the newly transformed dependent variable as target.
To form the probability predictions, employ three separate statistical models: (1) logistic regression, (2) linear discriminant analysis, (3) naive Bayes classifer. Similar to the Problem 1, construct the quintile portfolio sorts for each of these statistical models, but now use the predicted probabilities as sorting variables.
Transform the ret_adj into binary
variable.
dat80[, ret_adj_binary := ifelse(dat80[,ret_adj] > 0, 1, 0)]
#-------------------------------------------------------------------#
# Use 30 years of data to build logistic regression model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
names(dat80)
pred_name <- names(dat80[,11:40]) # take out the predictors
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("ret_adj_binary",pred_name), with = FALSE]
#----- where the magical prediction happens ----#
logisticReg = glm(ret_adj_binary ~ ., data = train_dat,
family = binomial(link = "logit")) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]), # 呼叫i + 1 ~ i + 12的樣本
ProbEstimateLog := predict(logisticReg, newdata = x_test, type = "response")]
#----- end of prediction task ----#
}
Form the quintile sort portfolios using the logistic
regression model prediction. Use the mkt_cap_lag as the
portfolio weight.
Check the maximum and minimum values of the predicted values.
Range = dat80[, as.list(range(ProbEstimateLog, na.rm=T)), keyby = eom]
dat_for_portfolio_Log = na.omit(dat80[,.(eom, ret_adj, mktcap_lag, ProbEstimateLog)],"ProbEstimateLog")
dat_for_portfolio_Log[, portf_group := assign_portfolio(ProbEstimateLog, seq(0.2,0.8,0.2)), by = eom]
# group by 20th percentile
Calculate the weighted average rate of return for each portfolio by
the weight of mkt_cap_lag.
quintile_portf_Log <- dat_for_portfolio_Log[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(quintile_portf_Log, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
quintile_portf_Log <- merge(quintile_portf_Log, ff5f[,.(eom,RF)], all.x=TRUE)
quintile_portf_Log[, excess_ret := 100*ret - RF]
quintile_portf_Log <- dcast(quintile_portf_Log, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
m_Log <- unlist( quintile_portf_Log[, lapply(.SD, mean), .SDcols = 2L:6L] )
s_Log <- unlist( quintile_portf_Log[, lapply(.SD, sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
ls_portf_Log <- unlist( quintile_portf_Log[,6L] - quintile_portf_Log[,2L] )
ls_portf_wealth_Log = cumprod(1 + ls_portf_Log/100)
library(xts)
ls_portf_wealth_Log = xts(ls_portf_wealth_Log, as.Date(quintile_portf_Log[,eom]) )
plot(ls_portf_wealth_Log)
Calculate the Sharpe ratio of the long-short portfolio constructed with logistic regression prediction.
sqrt(12)*mean(ls_portf_Log)/sd(ls_portf_Log)
## [1] 0.3308841
#-------------------------------------------------------------------#
# Use 30 years of data to build linear discriminant analysis model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
library(MASS) # package to estimate LDA model
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
names(dat80)
pred_name <- names(dat80[,11:40]) # take out the predictors
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("ret_adj_binary",pred_name), with = FALSE]
#----- where the magical prediction happens ----#
lda_fit = lda(ret_adj_binary ~ ., method = "moment", data = train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]), # 呼叫i + 1 ~ i + 12的樣本
lda_pred := predict(lda_fit, newdata = x_test)$posterior[,2]]
#----- end of prediction task ----#
}
Form the quintile sort portfolios using the linear
discriminant analysis model prediction. Use the mkt_cap_lag
as the portfolio weight.
Check the maximum and minimum values of the predicted values.
Range = dat80[, as.list(range(lda_pred, na.rm=T)), keyby = eom]
dat_for_portfolio_LDA = na.omit(dat80[,.(eom, ret_adj, mktcap_lag, lda_pred)],"lda_pred")
dat_for_portfolio_LDA[, portf_group := assign_portfolio(lda_pred, seq(0.2,0.8,0.2)), by = eom]
# group by 20th percentile
Calculate the weighted average rate of return for each portfolio by
the weight of mkt_cap_lag.
quintile_portf_LDA <- dat_for_portfolio_LDA[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(quintile_portf_LDA, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
quintile_portf_LDA <- merge(quintile_portf_LDA, ff5f[,.(eom,RF)], all.x = TRUE)
quintile_portf_LDA[, excess_ret := 100*ret - RF]
quintile_portf_LDA <- dcast(quintile_portf_LDA, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
m_LDA <- unlist( quintile_portf_LDA[, lapply(.SD, mean), .SDcols = 2L:6L] )
s_LDA <- unlist( quintile_portf_LDA[, lapply(.SD, sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
ls_portf_LDA <- unlist( quintile_portf_LDA[,6L] - quintile_portf_LDA[,2L] )
ls_portf_wealth_LDA = cumprod(1 + ls_portf_LDA/100)
library(xts)
ls_portf_wealth_LDA = xts(ls_portf_wealth_LDA, as.Date(quintile_portf_LDA[,eom]) )
plot(ls_portf_wealth_LDA)
Calculate the Sharpe ratio of the long-short portfolio constructed with linear discriminant analysis prediction.
sqrt(12)*mean(ls_portf_LDA)/sd(ls_portf_LDA)
## [1] 0.3284319
#-------------------------------------------------------------------#
# Use 30 years of data to build linear discriminant analysis model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
library(e1071) # package to fit naive Bayes classifier```
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
names(dat80)
pred_name <- names(dat80[,11:40]) # take out the predictors
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("ret_adj_binary",pred_name), with = FALSE]
#----- where the magical prediction happens ----#
naiveBayes_fit = naiveBayes(ret_adj_binary ~ ., data = train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
result <- predict(naiveBayes_fit, newdata = x_test, type = "raw")
dat80[eom %between% c(d[i+1], d[i+12]), # 呼叫i + 1 ~ i + 12的樣本
naiveBayes_pred := result[,2]]
#----- end of prediction task ----#
}
Form the quintile sort portfolios using the naive
Bayes classifier model prediction. Use the mkt_cap_lag as
the portfolio weight.
Check the maximum and minimum values of the predicted values.
Range = dat80[, as.list(range(naiveBayes_pred, na.rm=T)), keyby = eom]
dat_for_portfolio_NB = na.omit(dat80[,.(eom, ret_adj, mktcap_lag, naiveBayes_pred)],"naiveBayes_pred")
dat_for_portfolio_NB[, portf_group := assign_portfolio(naiveBayes_pred, seq(0.2,0.8,0.2)), by = eom]
# group by 20th percentile
Calculate the weighted average rate of return for each portfolio by
the weight of mkt_cap_lag.
quintile_portf_NB <- dat_for_portfolio_NB[, .(ret = weighted.mean(ret_adj, w = mktcap_lag, na.rm = T)),
keyby = .(eom, portf_group)]
setkey(quintile_portf_NB, eom)
setkey(ff5f, eom)
Import the eom and RF(risk-free rate) of
ff5f to calculate the excess return of each portfolio.
quintile_portf_NB <- merge(quintile_portf_NB, ff5f[,.(eom,RF)], all.x = TRUE)
quintile_portf_NB[, excess_ret := 100*ret - RF]
quintile_portf_NB <- dcast(quintile_portf_NB, eom ~ portf_group, value.var = "excess_ret")
Calculate the average returns and volatility of each portfolio.
m_NB <- unlist( quintile_portf_NB[, lapply(.SD, mean), .SDcols = 2L:6L] )
s_Nb <- unlist( quintile_portf_NB[, lapply(.SD, sd), .SDcols = 2L:6L] )
Calculate the cumulated returns of long-short portfolios.
ls_portf_NB <- unlist( quintile_portf_NB[,6L] - quintile_portf_NB[,2L] )
ls_portf_wealth_NB = cumprod(1 + ls_portf_NB/100)
library(xts)
ls_portf_wealth_NB = xts(ls_portf_wealth_NB, as.Date(quintile_portf_NB[,eom]) )
plot(ls_portf_wealth_NB)
Calculate the Sharpe ratio of the long-short portfolio constructed with naive Bayes classifier prediction.
sqrt(12)*mean(ls_portf_NB)/sd(ls_portf_NB)
## [1] 0.2786669
From the Problems 1 and 2, we have 7 long-short portfolios: Four
long-short portfolios are based on the prediction from linear
regression, logistic regression, linear discriminant analysis, and naive
Bayes classifier; Three long-short portfolios based on simple
univariate sort of three firm characteristics bm,
roe, and gpm.
For each portfolio, estimate the alpha from the following linear regression:
𝑅portfolio𝑡 = 𝛼 + 𝛽 × MktRf𝑡 + 𝑠 × SMB𝑡 + ℎ × HML𝑡 + 𝜀𝑡
where 𝑅portfolio𝑡 is the long-short portfolio return.
ff5f <- ff5f[457:708,] # retain the data from "2001-07-31" ~ "2022-06-30"
#---------- Prediction portfolio ----------------#
ls_portf_wealth <- as.data.table(ls_portf_wealth) # transform xts to data.table
setnames(ls_portf_wealth, names(ls_portf_wealth), c("eom", "R_portfolio"))
ls_portf_wealth <- merge(ls_portf_wealth, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
lm_mdl_lin = lm(R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth)
Check the significance of alpha.
summary(lm_mdl_lin)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.04837 -0.33657 -0.00937 0.27693 1.21309
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.869527 0.031092 60.130 <2e-16 ***
## MktRf 0.014489 0.007388 1.961 0.0510 .
## SMB -0.024699 0.012722 -1.941 0.0533 .
## HML -0.007802 0.010473 -0.745 0.4570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4885 on 248 degrees of freedom
## Multiple R-squared: 0.02675, Adjusted R-squared: 0.01498
## F-statistic: 2.272 on 3 and 248 DF, p-value: 0.08075
coefficients(lm_mdl_lin)[1] # alpha
## (Intercept)
## 1.869527
summary(lm_mdl_lin)$coefficients[1, "t value"] # check the t value of alpha
## [1] 60.12973
#---------- Prediction portfolio ----------------#
ls_portf_wealth_Log <- as.data.table(ls_portf_wealth_Log) # transform xts to data.table
setnames(ls_portf_wealth_Log, names(ls_portf_wealth_Log), c("eom", "R_portfolio"))
ls_portf_wealth_Log <- merge(ls_portf_wealth_Log, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
lm_mdl_Log = lm(R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_Log)
Check the significance of alpha.
summary(lm_mdl_Log)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_Log)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43991 -0.14870 -0.02644 0.13297 0.99333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.449098 0.013424 107.949 < 2e-16 ***
## MktRf -0.008908 0.003190 -2.793 0.00563 **
## SMB -0.010912 0.005493 -1.987 0.04807 *
## HML -0.002895 0.004522 -0.640 0.52264
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2109 on 248 degrees of freedom
## Multiple R-squared: 0.07406, Adjusted R-squared: 0.06286
## F-statistic: 6.612 on 3 and 248 DF, p-value: 0.0002581
coefficients(lm_mdl_Log)[1] # alpha
## (Intercept)
## 1.449098
summary(lm_mdl_Log)$coefficients[1, "t value"] # check the t value of alpha
## [1] 107.9491
#---------- Prediction portfolio ----------------#
ls_portf_wealth_LDA <- as.data.table(ls_portf_wealth_LDA)
setnames(ls_portf_wealth_LDA, names(ls_portf_wealth_LDA), c("eom", "R_portfolio"))
ls_portf_wealth_LDA <- merge(ls_portf_wealth_LDA, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
lm_mdl_LDA = lm(R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_LDA)
Check the significance of alpha.
summary(lm_mdl_LDA)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_LDA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43221 -0.14758 -0.03006 0.13483 0.98217
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.440947 0.013330 108.095 < 2e-16 ***
## MktRf -0.008911 0.003167 -2.813 0.00529 **
## SMB -0.010631 0.005454 -1.949 0.05241 .
## HML -0.003000 0.004490 -0.668 0.50467
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2095 on 248 degrees of freedom
## Multiple R-squared: 0.074, Adjusted R-squared: 0.0628
## F-statistic: 6.607 on 3 and 248 DF, p-value: 0.0002599
coefficients(lm_mdl_LDA)[1] # alpha
## (Intercept)
## 1.440947
summary(lm_mdl_LDA)$coefficients[1, "t value"] # check the t value of alpha
## [1] 108.0953
#---------- Prediction portfolio ----------------#
ls_portf_wealth_NB <- as.data.table(ls_portf_wealth_NB)
setnames(ls_portf_wealth_NB, names(ls_portf_wealth_NB), c("eom", "R_portfolio"))
ls_portf_wealth_NB <- merge(ls_portf_wealth_NB, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
lm_mdl_NB = lm(R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_NB)
Check the significance of alpha.
summary(lm_mdl_NB)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = ls_portf_wealth_NB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66616 -0.19711 -0.02503 0.17965 0.90459
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.679189 0.018153 92.504 < 2e-16 ***
## MktRf -0.018235 0.004313 -4.228 3.32e-05 ***
## SMB -0.002184 0.007428 -0.294 0.769
## HML -0.005831 0.006115 -0.954 0.341
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2852 on 248 degrees of freedom
## Multiple R-squared: 0.08747, Adjusted R-squared: 0.07643
## F-statistic: 7.924 on 3 and 248 DF, p-value: 4.562e-05
coefficients(lm_mdl_NB)[1] # alpha
## (Intercept)
## 1.679189
summary(lm_mdl_NB)$coefficients[1, "t value"] # check the t value of alpha
## [1] 92.50352
bm)#---------- Benchmark portfolio ----------------#
bm_ls_portf_wealth_B <- as.data.table(bm_ls_portf_wealth_B)
setnames(bm_ls_portf_wealth_B, names(bm_ls_portf_wealth_B), c("eom", "R_portfolio"))
bm_ls_portf_wealth_B <- merge(bm_ls_portf_wealth_B, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
bm_lm_mdl_B = lm(R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_B)
Check the significance of alpha.
summary(bm_lm_mdl_B)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_B)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02479 -0.37155 0.08162 0.41163 0.65984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.599897 0.028904 55.352 <2e-16 ***
## MktRf 0.007242 0.006868 1.054 0.293
## SMB -0.007065 0.011827 -0.597 0.551
## HML 0.001167 0.009736 0.120 0.905
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4541 on 248 degrees of freedom
## Multiple R-squared: 0.004781, Adjusted R-squared: -0.007258
## F-statistic: 0.3971 on 3 and 248 DF, p-value: 0.7552
coefficients(bm_lm_mdl_B)[1] # alpha
## (Intercept)
## 1.599897
summary(bm_lm_mdl_B)$coefficients[1, "t value"] # check the t value of alpha
## [1] 55.35217
roe)#---------- Benchmark portfolio ----------------#
bm_ls_portf_wealth_E <- as.data.table(bm_ls_portf_wealth_E)
setnames(bm_ls_portf_wealth_E, names(bm_ls_portf_wealth_E), c("eom", "R_portfolio"))
bm_ls_portf_wealth_E <- merge(bm_ls_portf_wealth_E, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
bm_lm_mdl_E = lm(R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_E)
Check the significance of alpha.
summary(bm_lm_mdl_E)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_E)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.48716 -0.16056 -0.04462 0.11936 1.00329
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.024285 0.014622 70.050 < 2e-16 ***
## MktRf -0.015166 0.003474 -4.365 1.86e-05 ***
## SMB 0.005742 0.005983 0.960 0.338
## HML -0.006503 0.004926 -1.320 0.188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2297 on 248 degrees of freedom
## Multiple R-squared: 0.08206, Adjusted R-squared: 0.07096
## F-statistic: 7.39 on 3 and 248 DF, p-value: 9.221e-05
coefficients(bm_lm_mdl_E)[1] # alpha
## (Intercept)
## 1.024285
summary(bm_lm_mdl_E)$coefficients[1, "t value"] # check the t value of alpha
## [1] 70.05035
gpm)#---------- Benchmark portfolio ----------------#
bm_ls_portf_wealth_G <- as.data.table(bm_ls_portf_wealth_G)
setnames(bm_ls_portf_wealth_G, names(bm_ls_portf_wealth_G), c("eom", "R_portfolio"))
bm_ls_portf_wealth_G <- merge(bm_ls_portf_wealth_G, ff5f[, .(eom, MktRf, SMB, HML)], by = "eom", all.x = TRUE)
Run the linear regression model.
bm_lm_mdl_G = lm(R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_G)
Check the significance of alpha.
summary(bm_lm_mdl_G)
##
## Call:
## lm(formula = R_portfolio ~ MktRf + SMB + HML, data = bm_ls_portf_wealth_G)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18015 -0.10560 -0.04514 0.10464 0.29762
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.652e-01 8.195e-03 105.585 <2e-16 ***
## MktRf -8.971e-05 1.947e-03 -0.046 0.963
## SMB 1.977e-03 3.353e-03 0.590 0.556
## HML -1.619e-03 2.760e-03 -0.587 0.558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1288 on 248 degrees of freedom
## Multiple R-squared: 0.002311, Adjusted R-squared: -0.009757
## F-statistic: 0.1915 on 3 and 248 DF, p-value: 0.9021
coefficients(bm_lm_mdl_G)[1] # alpha
## (Intercept)
## 0.8652204
summary(bm_lm_mdl_G)$coefficients[1, "t value"] # check the t value of alpha
## [1] 105.5852