Problem 1

Repeat the cross-sectional stock return predictability discussed in the class with the following modification

  1. At the end of each portfolio formation month, exclude the stocks with market capitalization less than the 20% quantile.

    install.packages("data.table")
    library(data.table)
    source("C:/Users/88698/Documents/R/some_useful_functions.R")
    ff5f <- fread("C:/Users/88698/Documents/R/ff5f_monthly.csv")
    
    dat = setDT( readRDS("C:/Users/88698/Documents/R/DATA_for_Monthly_Rebalancing.RDS") )
    
    pred_name = names(dat)[-(1:9)]
    
    d = sort( unique(dat$eom) ) 
    
    #先計算每個 eom 第 20 百分比的值(亦即 PR20 的值)
    result_dat <- dat[, .(mktcap_lag20 = quantile(mktcap_lag, 0.2)), keyby = eom]
    
    dat <- merge(dat, result_dat, by="eom")
    
    dat80 <- dat[dat$mktcap_lag>=dat$mktcap_lag20]
  2. Transform each predictor into ranked variable (at the end of each month). You can use the function frank to do this.

    #值得注意的是我這邊把 ranking variable 放在新的一行,再把舊資料刪除
    
    pred_name <- names(dat80)[10:39]
    
    col <- pred_name
    
    dat80[, paste0(col,"rank") := lapply(.SD, function(x) frank(x, ties.method = "min")), by = "eom", .SDcols = pred_name]
    
    dat80 <- dat80[, -c(10:40)]
  3. Use the previous 30 years data for training the linear regression. Similar to the demonstration in the class, the dependent variable is ret_adj, use all of the 30 predictors (accrual_lag, … , short_debt_lag), and re-estimate the linear model after 12 months.

    #-------------------------------------------------------------------#
    # Use 30 years of data to build linear model and
    # predict the next 12 months ahead future cross-sectional return
    #-------------------------------------------------------------------#
    
    # Checking if the indexing is correct
    for(i in seq(360,(length(d)-12),by=12) ) {
      cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
      cat( paste0("   Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
    }
    
    pred_name <- names(dat80)[10:39]
    
    for(i in seq(360,(length(d)-12),by=12) ) {
      train_dat = dat80[ eom %between% c(d[i-359], d[i]),
                         c("ret_adj",pred_name), with = FALSE]
    
      #----- where the magical prediction happens ----#
    
      lm_mdl = lm(ret_adj ~ ., data=train_dat)  # model estimation
    
      x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ] 
    
      dat80[eom %between% c(d[i+1], d[i+12]), 
            pred := predict(lm_mdl, newdata = x_test )  ] 
    
      #----- end of prediction task ----#
    
    }
  4. Form the quin-tile sort portfolios using the linear regression model prediction. Use the market-cap as the portfolio weight.

    Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]
    
    dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")
    
    dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
  5. For each portfolio, report the average returns, volatility, and Sharpe ratio. Does the sorting based on linear regression prediction generate monotonically increasing/decreasing portfolio performance?

    #回報平均報酬
    quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
    quintile_portf
    
    
    setkey(quintile_portf, eom)
    setkey(ff5f, eom)
    
    quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)
    
    #回報超額報酬
    quintile_portf[, excess_ret := 100*ret - RF]
    
    #轉成並排五個 portfolio
    quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")
    
    #回報平均超額報酬
    m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
    
    #回報Votality
    s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
    
    #回報sharpe ratio
    sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )

    #Form long-short portfolio
    ln_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
    ln_ls_portf_wealth = cumprod(1+ln_ls_portf/100)
    
    install.packages("xts")
    library(xts)
    ln_ls_portf_wealth = xts(ln_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
    ln_ls_portf = xts(ln_ls_portf, as.Date(quintile_portf[,eom]) )
    plot(ln_ls_portf_wealth)
    
    sqrt(12)*mean(ln_ls_portf)/sd(ln_ls_portf)
    #[1] 0.6063449

    According to the plot, the portfolio is generating a monotonically increasing/decreasing portfolio performance.

  6. Construct the following benchmark strategies and compare their Sharpe ratio with the long-short portfolio constructed with linear regression prediction.

    a. Market-capitalization weighted portfolio

    b. Each of long-short quin-tile sorted portfolios based on the following firm characteristics:Book-to-market ratio (bm), Return on equity (roe), Gross-profit margin (gpm)

    #market capitalization portfolio
    dat_for_mw <-dat80
    mw_port <- dat_for_mw[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), 
                          keyby=.(eom)]
    
    m_m <- mean(mw_port$ret)
    m_s <- sd(mw_port$ret)
    m_m
    m_s
    
    sqrt(12)*mean(mw_port$ret)/sd(mw_port$ret)
    #[1] 0.7085098

    #---------- Benchmark portfolio for bm----------------#
    
    benchmark_portfolio = na.omit(dat80,"pred")
    benchmark_portfolio[, pred := bm_lagrank]
    benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
    
    bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
    
    setkey(bm_portf, eom)
    setkey(ff5f, eom)
    
    bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
    bm_portf[, excess_ret := 100*ret - RF]
    bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
    
    bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
    bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
    
    bm_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
    bm_bm_ls_portf_wealth = cumprod(1+bm_bm_ls_portf/100)
    
    library(xts)
    bm_bm_ls_portf_wealth = xts(bm_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
    bm_bm_ls_portf = xts(bm_bm_ls_portf, as.Date(bm_portf[,eom]) )
    plot(bm_bm_ls_portf_wealth)
    
    lines(bm_bm_ls_portf_wealth, col=2)
    
    sqrt(12)*mean(bm_bm_ls_portf)/sd(bm_bm_ls_portf)
    #[1] 0.1111413

    #---------- Benchmark portfolio for roe----------------#
    
    benchmark_portfolio = na.omit(dat80,"pred")
    benchmark_portfolio[, pred := roe_lagrank]
    benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
    
    bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
    
    setkey(bm_portf, eom)
    setkey(ff5f, eom)
    
    bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
    bm_portf[, excess_ret := 100*ret - RF]
    bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
    
    bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
    bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
    
    roe_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
    roe_bm_ls_portf_wealth = cumprod(1+roe_bm_ls_portf/100)
    
    library(xts)
    roe_bm_ls_portf_wealth = xts(roe_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
    roe_bm_ls_portf = xts(roe_bm_ls_portf, as.Date(bm_portf[,eom]) )
    plot(roe_bm_ls_portf_wealth)
    lines(roe_bm_ls_portf_wealth, col=2)
    
    sqrt(12)*mean(roe_bm_ls_portf)/sd(roe_bm_ls_portf) 
    #[1] 0.0983983

    #---------- Benchmark portfolio for gpm----------------#
    
    benchmark_portfolio = na.omit(dat80,"pred")
    benchmark_portfolio[, pred := gpm_lagrank]
    benchmark_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]
    
    bm_portf <- benchmark_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]
    
    setkey(bm_portf, eom)
    setkey(ff5f, eom)
    
    bm_portf <- merge(bm_portf, ff5f[,.(eom,RF)], all.x=TRUE)
    bm_portf[, excess_ret := 100*ret - RF]
    bm_portf <- dcast(bm_portf,eom ~ portf_group, value.var="excess_ret")
    
    bm_m <- unlist( bm_portf[, lapply(.SD,mean), .SDcols=2L:6L] )
    bm_s <- unlist( bm_portf[, lapply(.SD,sd), .SDcols=2L:6L] )
    
    gpm_bm_ls_portf <- unlist( bm_portf[,6L] - bm_portf[,2L] )
    gpm_bm_ls_portf_wealth = cumprod(1+gpm_bm_ls_portf/100)
    
    library(xts)
    gpm_bm_ls_portf_wealth = xts(gpm_bm_ls_portf_wealth, as.Date(bm_portf[,eom]) )
    gpm_bm_ls_portf = xts(gpm_bm_ls_portf, as.Date(bm_portf[,eom]) )
    plot(gpm_bm_ls_portf_wealth)
    lines(gpm_bm_ls_portf_wealth, col=2)
    
    sqrt(12)*mean(gpm_bm_ls_portf)/sd(gpm_bm_ls_portf)
    #[1] -0.032847

Problem 2

Transform the ret_adj into binary variable 𝑦 = 1 if ret_adj is positive and 𝑦 = 0 if ret_adj is less than or equal to zero. Repeat Problem 1, but now in the Step 3, use the newly transformed dependent variable as target. To form the probability predictions, please employ three separate statistical models: (1) logistic regression, (2) linear discriminant analysis, (3) naive Bayes classifier. Similar to the Problem 1, construct the quin-tile portfolio sorts for each of these statistical models, but now use the predicted probabilities as sorting variables.

# 前面步驟不用重做,只要前面跑過,dat80就會以我們要的形式存在
# turning ret_adj into a binary variable and putting it into a new collumn in order # to preserve ret_adj

dat80$y <- ifelse(dat80$ret_adj>0 , 1, 0)

#-------------------------------------------------------------------#
# Use 30 years of data to build statistical model and
# predict the next 12 months ahead future cross-sectional return
#-------------------------------------------------------------------#

# Checking if the indexing is correct
for(i in seq(360,(length(d)-12),by=12) ) {
  cat( paste0("Training data: ", d[i-359], " ~ ", d[i]), "\n" )
  cat( paste0("   Prediction: ", d[i+1], " ~ ", d[i+12]), "\n" )
}

install.packages("ISLR")
install.packages("MASS")
install.packages("margins")
install.packages("e1071")
library("ISLR")
library("MASS")
library("margins")
library("e1071")

#--------------------------Model 1 Logistic Regression-------------------------#

for(i in seq(360,(length(d)-12),by=12) ) {
  train_dat = dat80[ eom %between% c(d[i-359], d[i]),
                     c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,
  #變二元變數
  
  #----- where the magical prediction happens ----#
  
  lg_mdl = glm(y ~ ., data=train_dat, family = binomial(link = "logit"))  # model estimation
  
  x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ] 
  
  dat80[eom %between% c(d[i+1], d[i+12]), 
        pred := predict(lg_mdl, newdata = x_test )  ] 
  
  #----- end of prediction task ----#
  
}

Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]

dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")

dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]

#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]

setkey(quintile_portf, eom)
setkey(ff5f, eom)

quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)

#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]

#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")

#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )

#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )

#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )

#Form long-short portfolio
lg_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
lg_ls_portf_wealth = cumprod(1+lg_ls_portf/100)

library(xts)
lg_ls_portf_wealth = xts(lg_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
lg_ls_portf = xts(lg_ls_portf, as.Date(quintile_portf[,eom]) )
plot(lg_ls_portf_wealth)

sqrt(12)*mean(lg_ls_portf)/sd(lg_ls_portf) 
#[1] 0.3308841

#---------------------------------Model 2 LDA----------------------------------#

for(i in seq(360,(length(d)-12),by=12) ) {
  train_dat = dat80[ eom %between% c(d[i-359], d[i]),
                     c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,變二元變數
  
  #----- where the magical prediction happens ----#
  
  lda_fit = lda(y ~ ., method="moment", data=train_dat)  # model estimation
  
  x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ] 
  
  dat80[eom %between% c(d[i+1], d[i+12]), 
        pred := predict(lda_fit, newdata = x_test )$posterior[,1]  ] 
  
  #----- end of prediction task ----#
  
}

Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]

dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")

dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]

#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]

setkey(quintile_portf, eom)
setkey(ff5f, eom)

quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)

#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]

#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")

#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )

#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )

#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )

#Form long-short portfolio
lda_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
lda_ls_portf_wealth = cumprod(1+lda_ls_portf/100)

library(xts)
lda_ls_portf_wealth = xts(lda_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
lda_ls_portf = xts(lda_ls_portf, as.Date(quintile_portf[,eom]) )
plot(lda_ls_portf_wealth)

sqrt(12)*mean(lda_ls_portf)/sd(lda_ls_portf)
#[1] -0.3293088

#--------------------------Model 3 Naive Bayes----------------------------------#

#Naive Bayes
for(i in seq(360,(length(d)-12),by=12) ) {
  train_dat = dat80[ eom %between% c(d[i-359], d[i]),
                     c("y", pred_name), with = FALSE] #注意到ret_adj 改成 y,
  #變二元變數
  
  #----- where the magical prediction happens ----#
  
  nby = naiveBayes(y ~ ., data=train_dat)  # model estimation
  
  x_test = dat80[ eom %between% c(d[i+1], d[i+12]), pred_name, with=F ]
  
  result <- predict(nby, newdata = x_test, type = "raw")
  
  dat80[eom %between% c(d[i+1], d[i+12]), 
        pred := result[,2]  ] 
  
  #----- end of prediction task ----#
  
}

Range = dat80[, as.list(range(pred,na.rm=T)), keyby=eom]

dat_for_portfolio = na.omit(dat80[,.(eom,ret_adj,mktcap_lag,pred)],"pred")

dat_for_portfolio[, portf_group := assign_portfolio(pred, seq(0.2,0.8,0.2)), by=eom]

#回報平均報酬
quintile_portf <- dat_for_portfolio[, .(ret = weighted.mean(ret_adj,w=mktcap_lag,na.rm=T)), keyby=.(eom,portf_group)]

setkey(quintile_portf, eom)
setkey(ff5f, eom)

quintile_portf <- merge(quintile_portf, ff5f[,.(eom,RF)], all.x=TRUE)

#回報超額報酬
quintile_portf[, excess_ret := 100*ret - RF]

#轉成並排五個 portfolio
quintile_portf <- dcast(quintile_portf,eom ~ portf_group, value.var="excess_ret")

#回報平均超額報酬
m <- unlist( quintile_portf[, lapply(.SD,mean), .SDcols=2L:6L] )

#回報Votality
s <- unlist( quintile_portf[, lapply(.SD,sd), .SDcols=2L:6L] )

#回報sharpe ratio
sh <- unlist( quintile_portf[, lapply(.SD,\(x) sqrt(12)*mean(x)/sd(x)), .SDcols=2L:6L] )

#Form long-short portfolio
nb_ls_portf <- unlist( quintile_portf[,6L] - quintile_portf[,2L] )
nb_ls_portf_wealth = cumprod(1+nb_ls_portf/100)

library(xts)
nb_ls_portf_wealth = xts(nb_ls_portf_wealth, as.Date(quintile_portf[,eom]) )
nb_ls_portf= xts(nb_ls_portf, as.Date(quintile_portf[,eom]) )
plot(nb_ls_portf_wealth)

sqrt(12)*mean(nb_ls_portf)/sd(nb_ls_portf) 
#[1] 0.2786669

Problem 3

From the Problems 1 and 2, we have 7 long-short portfolios: Four long-short portfolios are based on the prediction from linear regression, logistic regression, linear discriminant analysis, and naive Bayes classifier; Three long-short portfolios based on simple univariate sort of three firm characteristics bm, roe, and gpm. For each portfolio, estimate the alpha from the following linear regression \[R_t^{portfolio}=\alpha+\beta\times MkRf_t+s\times SMB+t+h\times HML_t+\epsilon_t\]

where \(R^{portfolio}_t\) is the long-short portfolio return. Which portfolios have the statistically significant alpha?

#在每個long short portfolio 中,都有一個ls_portf

ff5f <- ff5f[ eom >= "2001-07-01"]
ff5f <- ff5f[ eom <= "2022-07-01"]

#linear regression
ff5f$ln_r <- ln_ls_portf[,1]
ln_model <- lm(ln_r~SMB+HML+MktRf, data=ff5f)
summary(ln_model)

#logistic regression
ff5f$lg_r <- lg_ls_portf[,1]
lg_model <- lm(lg_r~SMB+HML+MktRf, data=ff5f)
summary(lg_model)

#lda regression
ff5f$lda_r <- lda_ls_portf[,1]
lda_model <- lm(lda_r~SMB+HML+MktRf, data=ff5f)
summary(lda_model)

#naiveBayes regression
ff5f$nb_r <- nb_ls_portf[,1]
nb_model <- lm(nb_r~SMB+HML+MktRf, data=ff5f)
summary(nb_model)

#Benchmark BM
ff5f$bm_r <- bm_bm_ls_portf[,1]
bm_model <- lm(bm_r~SMB+HML+MktRf, data=ff5f)
summary(bm_model)

#Benchmark ROE
ff5f$roe_r <- roe_bm_ls_portf[,1]
roe_model <- lm(roe_r~SMB+HML+MktRf, data=ff5f)
summary(roe_model)

#Benchmark gpm
ff5f$gpm_r <- gpm_bm_ls_portf[,1]
gpm_model <- lm(gpm_r~SMB+HML+MktRf, data=ff5f)
summary(gpm_model)
#linear regression
ff5f$ln_r <- ln_ls_portf[,1]
ln_model <- lm(ln_r~SMB+HML+MktRf, data=ff5f)
summary(ln_model)


Call:
lm(formula = ln_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
    Min      1Q  Median      3Q     Max 
-7.7670 -1.5805 -0.1113  1.4875  8.6292 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)   
(Intercept)  0.41379    0.15278   2.708  0.00723 **
SMB         -0.04661    0.06251  -0.746  0.45666   
HML         -0.11934    0.05147  -2.319  0.02121 * 
MktRf        0.03731    0.03630   1.028  0.30507   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.401 on 248 degrees of freedom
Multiple R-squared:  0.02871,   Adjusted R-squared:  0.01696 
F-statistic: 2.443 on 3 and 248 DF,  p-value: 0.06468


#logistic regression
ff5f$lg_r <- lg_ls_portf[,1]
lg_model <- lm(lg_r~SMB+HML+MktRf, data=ff5f)
summary(lg_model)

Call:
lm(formula = lg_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
     Min       1Q   Median       3Q      Max 
-17.7249  -1.9667  -0.1509   1.6251  13.6638 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.83538    0.21835   3.826 0.000165 ***
SMB         -0.91317    0.08934 -10.221  < 2e-16 ***
HML          0.38266    0.07355   5.203 4.12e-07 ***
MktRf       -0.37509    0.05188  -7.230 5.98e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.431 on 248 degrees of freedom
Multiple R-squared:  0.4847,    Adjusted R-squared:  0.4785 
F-statistic: 77.75 on 3 and 248 DF,  p-value: < 2.2e-16

 
#lda regression
ff5f$lda_r <- lda_ls_portf[,1]
lda_model <- lm(lda_r~SMB+HML+MktRf, data=ff5f)
summary(lda_model)

Call:
lm(formula = lda_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.6178  -1.6345   0.1697   1.9518  17.5738 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.83351    0.21822  -3.820 0.000169 ***
SMB          0.91161    0.08929  10.210  < 2e-16 ***
HML         -0.38236    0.07351  -5.201 4.14e-07 ***
MktRf        0.37617    0.05185   7.255 5.13e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.429 on 248 degrees of freedom
Multiple R-squared:  0.485, Adjusted R-squared:  0.4788 
F-statistic: 77.85 on 3 and 248 DF,  p-value: < 2.2e-16

 
#naiveBayes regression
ff5f$nb_r <- nb_ls_portf[,1]
nb_model <- lm(nb_r~SMB+HML+MktRf, data=ff5f)
summary(nb_model)

Call:
lm(formula = nb_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
     Min       1Q   Median       3Q      Max 
-15.3674  -2.1513  -0.2519   2.0804  14.3057 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.84661    0.25828   3.278   0.0012 ** 
SMB         -0.85684    0.10568  -8.108 2.37e-14 ***
HML          0.60521    0.08700   6.956 3.09e-11 ***
MktRf       -0.45542    0.06137  -7.421 1.85e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.058 on 248 degrees of freedom
Multiple R-squared:  0.4413,    Adjusted R-squared:  0.4345 
F-statistic: 65.28 on 3 and 248 DF,  p-value: < 2.2e-16

 
#Benchmark bm
ff5f$bm_r <- bm_bm_ls_portf[,1]
bm_model <- lm(bm_r~SMB+HML+MktRf, data=ff5f)
summary(bm_model)

Call:
lm(formula = bm_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
     Min       1Q   Median       3Q      Max 
-10.1867  -1.6105  -0.0729   1.6689  12.4246 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.06221    0.18832  -0.330   0.7414    
SMB          0.55690    0.07705   7.227 6.07e-12 ***
HML          0.80764    0.06344  12.732  < 2e-16 ***
MktRf        0.10966    0.04475   2.451   0.0149 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.959 on 248 degrees of freedom
Multiple R-squared:  0.5627,    Adjusted R-squared:  0.5574 
F-statistic: 106.4 on 3 and 248 DF,  p-value: < 2.2e-16


#Benchmark roe
ff5f$roe_r <- roe_bm_ls_portf[,1]
roe_model <- lm(roe_r~SMB+HML+MktRf, data=ff5f)
summary(roe_model)

Call:
lm(formula = roe_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
     Min       1Q   Median       3Q      Max 
-20.3764  -1.9263  -0.0548   2.0979  12.5664 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.57269    0.23908   2.395  0.01734 *  
SMB         -0.93147    0.09782  -9.522  < 2e-16 ***
HML          0.24539    0.08053   3.047  0.00256 ** 
MktRf       -0.43140    0.05681  -7.594  6.3e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.756 on 248 degrees of freedom
Multiple R-squared:  0.4706,    Adjusted R-squared:  0.4642 
F-statistic: 73.49 on 3 and 248 DF,  p-value: < 2.2e-16

 
#Benchmark gpm
ff5f$gpm_r <- gpm_bm_ls_portf[,1]
gpm_model <- lm(gpm_r~SMB+HML+MktRf, data=ff5f)
summary(gpm_model)

Call:
lm(formula = gpm_r ~ SMB + HML + MktRf, data = ff5f)

Residuals:
   Min     1Q Median     3Q    Max 
-9.215 -1.422  0.071  1.209  7.010 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.085040   0.139514   0.610    0.543    
SMB         -0.526780   0.057085  -9.228  < 2e-16 ***
HML         -0.346262   0.046996  -7.368 2.57e-12 ***
MktRf       -0.006491   0.033149  -0.196    0.845    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.192 on 248 degrees of freedom
Multiple R-squared:  0.448, Adjusted R-squared:  0.4413 
F-statistic: 67.09 on 3 and 248 DF,  p-value: < 2.2e-16

linear regression model, logistic regression model, lda, naive Bayes, benchmark (roe) 的 alpha 顯著