Repeat the cross-sectional stock return predictability discussed in the class with the following modification
At the end of each portfolio formation month, exclude the stocks with market capitalization less than the 20% quantile.
install.packages("data.table")
library(data.table)
source("C:/Users/88698/Documents/R/some_useful_functions.R")
ff5f <- fread("C:/Users/88698/Documents/R/ff5f_monthly.csv")
dat = setDT( readRDS("C:/Users/88698/Documents/R/DATA_for_Monthly_Rebalancing.RDS") )
pred_name = names(dat)[-(1:9)]
d = sort( unique(dat$eom) )
#先計算每個 eom 第 20 百分比的值(亦即 PR20 的值)
result_dat <- dat[, .(mktcap_lag20 = quantile(mktcap_lag, 0.2)), keyby = eom]
dat <- merge(dat, result_dat, by="eom")
dat80 <- dat[dat$mktcap_lag>=dat$mktcap_lag20]Transform each predictor into ranked variable (at the end of each month). You can use the function frank to do this.
#值得注意的是我這邊把 ranking variable 放在新的一行,再把舊資料刪除
pred_name <- names(dat80)[10:39]
col <- pred_name
dat80[, paste0(col,"rank") := lapply(.SD, function(x) frank(x, ties.method = "min")), by = "eom", .SDcols = pred_name]
dat80 <- dat80[, -c(10:40)]Use the previous 30 years data for training the linear regression. Similar to the demonstration in the class, the dependent variable is ret_adj, use all of the 30 predictors (accrual_lag, … , short_debt_lag), and re-estimate the linear model after 12 months.
#-------------------------------------------------------------------#
# Use 30 years of data to build linear model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
pred_name <- names(dat80)[10:39]
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("ret_adj",pred_name), with = FALSE]
#----- where the magical prediction happens ----#
lm_mdl = lm(ret_adj ~ ., data=train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]),
pred := predict(lm_mdl, newdata = x_test ) ]
#----- end of prediction task ----#
}Form the quin-tile sort portfolios using the linear regression model prediction. Use the market-cap as the portfolio weight.
Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]
dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")
dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]For each portfolio, report the average returns, volatility, and Sharpe ratio. Does the sorting based on linear regression prediction generate monotonically increasing/decreasing portfolio performance?
#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
quintile_portf
setkey(quintile_portf, eom)
setkey(ff5f, eom)
quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)
#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]
#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")
#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )
#Form long-short portfolio
ln_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
ln_ls_portf_wealth = cumprod(1+ln_ls_portf/100)
install.packages("xts")
library(xts)
ln_ls_portf_wealth = xts(ln_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
ln_ls_portf = xts(ln_ls_portf, as.Date(quintile_portf[,eom]) )
plot(ln_ls_portf_wealth)
sqrt(12)*mean(ln_ls_portf)/sd(ln_ls_portf)
#[1] 0.6063449
According to the plot, the portfolio is generating a monotonically increasing/decreasing portfolio performance.
Construct the following benchmark strategies and compare their Sharpe ratio with the long-short portfolio constructed with linear regression prediction.
a. Market-capitalization weighted portfolio
b. Each of long-short quin-tile sorted portfolios based on the following firm characteristics:Book-to-market ratio (bm), Return on equity (roe), Gross-profit margin (gpm)
#market capitalization portfolio
dat_for_mw <-dat80
mw_port <- dat_for_mw[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)),
keyby=.(eom)]
m_m <- mean(mw_port$ret)
m_s <- sd(mw_port$ret)
m_m
m_s
sqrt(12)*mean(mw_port$ret)/sd(mw_port$ret)
#[1] 0.7085098
#---------- Benchmark portfolio for bm----------------#
benchmark_portfolio = na.omit(dat80,"pred")
benchmark_portfolio[, pred := bm_lagrank]
benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(bm_portf, eom)
setkey(ff5f, eom)
bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
bm_portf[, excess_ret := 100*ret - RF]
bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
bm_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
bm_bm_ls_portf_wealth = cumprod(1+bm_bm_ls_portf/100)
library(xts)
bm_bm_ls_portf_wealth = xts(bm_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
bm_bm_ls_portf = xts(bm_bm_ls_portf, as.Date(bm_portf[,eom]) )
plot(bm_bm_ls_portf_wealth)
lines(bm_bm_ls_portf_wealth, col=2)
sqrt(12)*mean(bm_bm_ls_portf)/sd(bm_bm_ls_portf)
#[1] 0.1111413
#---------- Benchmark portfolio for roe----------------#
benchmark_portfolio = na.omit(dat80,"pred")
benchmark_portfolio[, pred := roe_lagrank]
benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(bm_portf, eom)
setkey(ff5f, eom)
bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
bm_portf[, excess_ret := 100*ret - RF]
bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
roe_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
roe_bm_ls_portf_wealth = cumprod(1+roe_bm_ls_portf/100)
library(xts)
roe_bm_ls_portf_wealth = xts(roe_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
roe_bm_ls_portf = xts(roe_bm_ls_portf, as.Date(bm_portf[,eom]) )
plot(roe_bm_ls_portf_wealth)
lines(roe_bm_ls_portf_wealth, col=2)
sqrt(12)*mean(roe_bm_ls_portf)/sd(roe_bm_ls_portf)
#[1] 0.0983983
#---------- Benchmark portfolio for gpm----------------#
benchmark_portfolio = na.omit(dat80,"pred")
benchmark_portfolio[, pred := gpm_lagrank]
benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(bm_portf, eom)
setkey(ff5f, eom)
bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
bm_portf[, excess_ret := 100*ret - RF]
bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
gpm_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
gpm_bm_ls_portf_wealth = cumprod(1+gpm_bm_ls_portf/100)
library(xts)
gpm_bm_ls_portf_wealth = xts(gpm_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
gpm_bm_ls_portf = xts(gpm_bm_ls_portf, as.Date(bm_portf[,eom]) )
plot(gpm_bm_ls_portf_wealth)
lines(gpm_bm_ls_portf_wealth, col=2)
sqrt(12)*mean(gpm_bm_ls_portf)/sd(gpm_bm_ls_portf)
#[1] -0.032847
Transform the ret_adj into binary variable 𝑦 = 1 if ret_adj is positive and 𝑦 = 0 if ret_adj is less than or equal to zero. Repeat Problem 1, but now in the Step 3, use the newly transformed dependent variable as target. To form the probability predictions, please employ three separate statistical models: (1) logistic regression, (2) linear discriminant analysis, (3) naive Bayes classifier. Similar to the Problem 1, construct the quin-tile portfolio sorts for each of these statistical models, but now use the predicted probabilities as sorting variables.
# 前面步驟不用重做,只要前面跑過,dat80就會以我們要的形式存在
# turning ret_adj into a binary variable and putting it into a new collumn in order # to preserve ret_adj
dat80$y <- ifelse(dat80$ret_adj>0 , 1, 0)
#-------------------------------------------------------------------#
# Use 30 years of data to build statistical model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#
# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
cat( paste0(" Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}
install.packages("ISLR")
install.packages("MASS")
install.packages("margins")
install.packages("e1071")
library("ISLR")
library("MASS")
library("margins")
library("e1071")
#--------------------------Model 1 Logistic Regression-------------------------#
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,
#變二元變數
#----- where the magical prediction happens ----#
lg_mdl = glm(y ~ ., data=train_dat, family = binomial(link = "logit")) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]),
pred := predict(lg_mdl, newdata = x_test ) ]
#----- end of prediction task ----#
}
Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]
dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")
dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(quintile_portf, eom)
setkey(ff5f, eom)
quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)
#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]
#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")
#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )
#Form long-short portfolio
lg_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
lg_ls_portf_wealth = cumprod(1+lg_ls_portf/100)
library(xts)
lg_ls_portf_wealth = xts(lg_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
lg_ls_portf = xts(lg_ls_portf, as.Date(quintile_portf[,eom]) )
plot(lg_ls_portf_wealth)
sqrt(12)*mean(lg_ls_portf)/sd(lg_ls_portf)
#[1] 0.3308841
#---------------------------------Model 2 LDA----------------------------------#
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,變二元變數
#----- where the magical prediction happens ----#
lda_fit = lda(y ~ ., method="moment", data=train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
dat80[eom %between% c(d[i+1], d[i+12]),
pred := predict(lda_fit, newdata = x_test )$posterior[,1] ]
#----- end of prediction task ----#
}
Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]
dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")
dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(quintile_portf, eom)
setkey(ff5f, eom)
quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)
#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]
#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")
#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )
#Form long-short portfolio
lda_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
lda_ls_portf_wealth = cumprod(1+lda_ls_portf/100)
library(xts)
lda_ls_portf_wealth = xts(lda_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
lda_ls_portf = xts(lda_ls_portf, as.Date(quintile_portf[,eom]) )
plot(lda_ls_portf_wealth)
sqrt(12)*mean(lda_ls_portf)/sd(lda_ls_portf)
#[1] -0.3293088
#--------------------------Model 3 Naive Bayes----------------------------------#
#Naive Bayes
for(i in seq(360,(length(d)-12),by=12) ) {
train_dat = dat80[ eom %between% c(d[i-359], d[i]),
c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,
#變二元變數
#----- where the magical prediction happens ----#
nby = naiveBayes(y ~ ., data=train_dat) # model estimation
x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
result <- predict(nby, newdata = x_test, type = "raw")
dat80[eom %between% c(d[i+1], d[i+12]),
pred := result[,2] ]
#----- end of prediction task ----#
}
Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]
dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")
dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
setkey(quintile_portf, eom)
setkey(ff5f, eom)
quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)
#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]
#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")
#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )
#Form long-short portfolio
nb_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
nb_ls_portf_wealth = cumprod(1+nb_ls_portf/100)
library(xts)
nb_ls_portf_wealth = xts(nb_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
nb_ls_portf= xts(nb_ls_portf, as.Date(quintile_portf[,eom]) )
plot(nb_ls_portf_wealth)
sqrt(12)*mean(nb_ls_portf)/sd(nb_ls_portf)
#[1] 0.2786669
From the Problems 1 and 2, we have 7 long-short portfolios: Four long-short portfolios are based on the prediction from linear regression, logistic regression, linear discriminant analysis, and naive Bayes classifier; Three long-short portfolios based on simple univariate sort of three firm characteristics bm, roe, and gpm. For each portfolio, estimate the alpha from the following linear regression \[R_t^{portfolio}=\alpha+\beta\times MkRf_t+s\times SMB+t+h\times HML_t+\epsilon_t\]
where \(R^{portfolio}_t\) is the long-short portfolio return. Which portfolios have the statistically significant alpha?
#在每個long short portfolio 中,都有一個ls_portf
ff5f <- ff5f[ eom >= "2001-07-01"]
ff5f <- ff5f[ eom <= "2022-07-01"]
#linear regression
ff5f$ln_r <- ln_ls_portf[,1]
ln_model <- lm(ln_r~SMB+HML+MktRf, data=ff5f)
summary(ln_model)
#logistic regression
ff5f$lg_r <- lg_ls_portf[,1]
lg_model <- lm(lg_r~SMB+HML+MktRf, data=ff5f)
summary(lg_model)
#lda regression
ff5f$lda_r <- lda_ls_portf[,1]
lda_model <- lm(lda_r~SMB+HML+MktRf, data=ff5f)
summary(lda_model)
#naiveBayes regression
ff5f$nb_r <- nb_ls_portf[,1]
nb_model <- lm(nb_r~SMB+HML+MktRf, data=ff5f)
summary(nb_model)
#Benchmark BM
ff5f$bm_r <- bm_bm_ls_portf[,1]
bm_model <- lm(bm_r~SMB+HML+MktRf, data=ff5f)
summary(bm_model)
#Benchmark ROE
ff5f$roe_r <- roe_bm_ls_portf[,1]
roe_model <- lm(roe_r~SMB+HML+MktRf, data=ff5f)
summary(roe_model)
#Benchmark gpm
ff5f$gpm_r <- gpm_bm_ls_portf[,1]
gpm_model <- lm(gpm_r~SMB+HML+MktRf, data=ff5f)
summary(gpm_model)
#linear regression
ff5f$ln_r <- ln_ls_portf[,1]
ln_model <- lm(ln_r~SMB+HML+MktRf, data=ff5f)
summary(ln_model)
Call:
lm(formula = ln_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-7.7670 -1.5805 -0.1113 1.4875 8.6292
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.41379 0.15278 2.708 0.00723 **
SMB -0.04661 0.06251 -0.746 0.45666
HML -0.11934 0.05147 -2.319 0.02121 *
MktRf 0.03731 0.03630 1.028 0.30507
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.401 on 248 degrees of freedom
Multiple R-squared: 0.02871, Adjusted R-squared: 0.01696
F-statistic: 2.443 on 3 and 248 DF, p-value: 0.06468
#logistic regression
ff5f$lg_r <- lg_ls_portf[,1]
lg_model <- lm(lg_r~SMB+HML+MktRf, data=ff5f)
summary(lg_model)
Call:
lm(formula = lg_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-17.7249 -1.9667 -0.1509 1.6251 13.6638
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.83538 0.21835 3.826 0.000165 ***
SMB -0.91317 0.08934 -10.221 < 2e-16 ***
HML 0.38266 0.07355 5.203 4.12e-07 ***
MktRf -0.37509 0.05188 -7.230 5.98e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.431 on 248 degrees of freedom
Multiple R-squared: 0.4847, Adjusted R-squared: 0.4785
F-statistic: 77.75 on 3 and 248 DF, p-value: < 2.2e-16
#lda regression
ff5f$lda_r <- lda_ls_portf[,1]
lda_model <- lm(lda_r~SMB+HML+MktRf, data=ff5f)
summary(lda_model)
Call:
lm(formula = lda_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-13.6178 -1.6345 0.1697 1.9518 17.5738
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.83351 0.21822 -3.820 0.000169 ***
SMB 0.91161 0.08929 10.210 < 2e-16 ***
HML -0.38236 0.07351 -5.201 4.14e-07 ***
MktRf 0.37617 0.05185 7.255 5.13e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.429 on 248 degrees of freedom
Multiple R-squared: 0.485, Adjusted R-squared: 0.4788
F-statistic: 77.85 on 3 and 248 DF, p-value: < 2.2e-16
#naiveBayes regression
ff5f$nb_r <- nb_ls_portf[,1]
nb_model <- lm(nb_r~SMB+HML+MktRf, data=ff5f)
summary(nb_model)
Call:
lm(formula = nb_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-15.3674 -2.1513 -0.2519 2.0804 14.3057
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.84661 0.25828 3.278 0.0012 **
SMB -0.85684 0.10568 -8.108 2.37e-14 ***
HML 0.60521 0.08700 6.956 3.09e-11 ***
MktRf -0.45542 0.06137 -7.421 1.85e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.058 on 248 degrees of freedom
Multiple R-squared: 0.4413, Adjusted R-squared: 0.4345
F-statistic: 65.28 on 3 and 248 DF, p-value: < 2.2e-16
#Benchmark bm
ff5f$bm_r <- bm_bm_ls_portf[,1]
bm_model <- lm(bm_r~SMB+HML+MktRf, data=ff5f)
summary(bm_model)
Call:
lm(formula = bm_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-10.1867 -1.6105 -0.0729 1.6689 12.4246
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.06221 0.18832 -0.330 0.7414
SMB 0.55690 0.07705 7.227 6.07e-12 ***
HML 0.80764 0.06344 12.732 < 2e-16 ***
MktRf 0.10966 0.04475 2.451 0.0149 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.959 on 248 degrees of freedom
Multiple R-squared: 0.5627, Adjusted R-squared: 0.5574
F-statistic: 106.4 on 3 and 248 DF, p-value: < 2.2e-16
#Benchmark roe
ff5f$roe_r <- roe_bm_ls_portf[,1]
roe_model <- lm(roe_r~SMB+HML+MktRf, data=ff5f)
summary(roe_model)
Call:
lm(formula = roe_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-20.3764 -1.9263 -0.0548 2.0979 12.5664
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.57269 0.23908 2.395 0.01734 *
SMB -0.93147 0.09782 -9.522 < 2e-16 ***
HML 0.24539 0.08053 3.047 0.00256 **
MktRf -0.43140 0.05681 -7.594 6.3e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.756 on 248 degrees of freedom
Multiple R-squared: 0.4706, Adjusted R-squared: 0.4642
F-statistic: 73.49 on 3 and 248 DF, p-value: < 2.2e-16
#Benchmark gpm
ff5f$gpm_r <- gpm_bm_ls_portf[,1]
gpm_model <- lm(gpm_r~SMB+HML+MktRf, data=ff5f)
summary(gpm_model)
Call:
lm(formula = gpm_r ~ SMB + HML + MktRf, data = ff5f)
Residuals:
Min 1Q Median 3Q Max
-9.215 -1.422 0.071 1.209 7.010
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.085040 0.139514 0.610 0.543
SMB -0.526780 0.057085 -9.228 < 2e-16 ***
HML -0.346262 0.046996 -7.368 2.57e-12 ***
MktRf -0.006491 0.033149 -0.196 0.845
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.192 on 248 degrees of freedom
Multiple R-squared: 0.448, Adjusted R-squared: 0.4413
F-statistic: 67.09 on 3 and 248 DF, p-value: < 2.2e-16
linear regression model, logistic regression model, lda, naive Bayes, benchmark (roe) 的 alpha 顯著