Get the Stock Data from Yahoo Finance and Clean the Data

rm(list = ls())

# Get the financial data from Yahoo Finance!
# install.packages("quantmod")
library(quantmod)
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.0.2
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 4.0.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
getSymbols("AAPL")
## [1] "AAPL"
getSymbols("BAC")
## [1] "BAC"
getSymbols("AXP")
## [1] "AXP"
getSymbols("KO")
## [1] "KO"
getSymbols("CVX")
## [1] "CVX"
# Calculate the monthly returns 
AAPL_m <- monthlyReturn(AAPL[,6], type = 'log')*100
colnames(AAPL_m) <- "AAPL"

BAC_m <- monthlyReturn(BAC[,6], type = 'log')*100
colnames(BAC_m) <- "BAC"

AXP_m <- monthlyReturn(AXP[,6], type = 'log')*100
colnames(AXP_m) <- "AXP"

KO_m <- monthlyReturn(KO[,6], type = 'log')*100
colnames(KO_m) <- "KO"

CVX_m <- monthlyReturn(CVX[,6], type = 'log')*100
colnames(CVX_m) <- "CVX"

MonthlyFactor <- read.csv("/Volumes/GoogleDrive/My Drive/CBS/1 งานสอน/704 Summer 2023/Data/MonthlyFactor.csv")

dim(AAPL_m)
## [1] 198   1
dim(BAC_m)
## [1] 198   1
dim(AXP_m)
## [1] 198   1
dim(KO_m)
## [1] 198   1
dim(CVX_m)
## [1] 198   1
dim(MonthlyFactor)
## [1] 196   8
TT <- dim(MonthlyFactor)[1]

AAPL_m <- as.matrix(AAPL_m[1:TT,],TT,1)
BAC_m  <- as.matrix(BAC_m[1:TT,],TT,1)
AXP_m  <- as.matrix(AXP_m[1:TT,],TT,1)
KO_m   <- as.matrix(KO_m[1:TT,],TT,1)
CVX_m  <- as.matrix(CVX_m[1:TT,],TT,1)
MonthlyFactor <- as.matrix(MonthlyFactor[,2:8], TT,7)

data <- cbind(AAPL_m, BAC_m, AXP_m, KO_m, CVX_m, MonthlyFactor)
data <- as.data.frame(data)
str(data)
## 'data.frame':    196 obs. of  12 variables:
##  $ AAPL  : num  2.28 -1.32 9.36 7.15 19.42 ...
##  $ BAC   : num  -1.416 -2.276 0.373 -0.235 0.726 ...
##  $ AXP   : num  -3.61 -2.35 -0.83 7.57 6.86 ...
##  $ KO    : num  -1.45 -2.54 3.5 8.37 1.52 ...
##  $ CVX   : num  2.66 -5.45 7.64 5.05 5.37 ...
##  $ Mkt.RF: num  1.4 -1.96 0.68 3.49 3.24 -1.96 -3.73 0.92 3.22 1.8 ...
##  $ SMB   : num  0.08 1.28 0.19 -2.03 0.4 0.75 -2.98 -0.37 -2.43 -0.03 ...
##  $ HML   : num  -0.69 -0.13 -0.96 -1.46 -0.65 -1.07 -3.72 -1.86 -2.23 -3.05 ...
##  $ RMW   : num  0.25 -0.51 0.64 1.15 1.58 0.53 0.2 -1.21 -0.52 -0.3 ...
##  $ CMA   : num  0.38 -0.71 -0.65 1.04 -1.36 0.08 -1.13 -0.53 -3.02 -0.09 ...
##  $ MOM   : num  0.24 -1.35 2.56 -0.24 -0.34 0.51 2.94 0.1 4.63 5.02 ...
##  $ RF    : num  0.44 0.38 0.43 0.44 0.41 0.4 0.4 0.42 0.32 0.32 ...
# Data Exploration 
psych::describe(data)
##        vars   n  mean    sd median trimmed  mad    min   max  range  skew
## AAPL      1 196  2.14  9.11   2.88    2.59 7.95 -39.98 21.33  61.31 -0.97
## BAC       2 196 -0.15 13.17   0.41    0.45 9.42 -76.07 54.89 130.96 -1.12
## AXP       3 196  0.63  9.34   0.90    0.99 5.84 -32.73 62.87  95.60  0.87
## KO        4 196  0.75  4.75   1.10    0.97 4.42 -18.25 13.27  31.52 -0.72
## CVX       5 196  0.76  7.02   1.14    0.72 5.63 -25.32 24.13  49.45  0.07
## Mkt.RF    6 196  0.76  4.73   1.28    0.98 3.81 -17.23 13.65  30.88 -0.53
## SMB       7 196  0.01  2.62   0.14   -0.05 2.56  -8.30  7.13  15.43  0.15
## HML       8 196 -0.20  3.37  -0.45   -0.30 2.40 -13.95 12.75  26.70  0.13
## RMW       9 196  0.37  1.91   0.39    0.29 1.59  -4.78  7.22  12.00  0.51
## CMA      10 196  0.11  1.98  -0.04    0.01 1.84  -6.92  7.74  14.66  0.49
## MOM      11 196  0.05  4.72   0.44    0.39 3.63 -34.30 12.75  47.05 -2.28
## RF       12 196  0.07  0.11   0.01    0.05 0.01   0.00  0.44   0.44  1.65
##        kurtosis   se
## AAPL       2.91 0.65
## BAC        7.37 0.94
## AXP       10.34 0.67
## KO         1.77 0.34
## CVX        1.68 0.50
## Mkt.RF     0.97 0.34
## SMB        0.34 0.19
## HML        2.53 0.24
## RMW        1.06 0.14
## CMA        1.60 0.14
## MOM       13.81 0.34
## RF         1.85 0.01
# Data Visualization
# install.packages("scatterPlotMatrix")
library(scatterPlotMatrix)
scatterPlotMatrix(data)
scatterPlotMatrix(data, regressionType = 1)

Factor Models of a Single Stock

# Model Estimation 

# Single Factor Model: Capital Asset Pricing Model 
# Ref: 
AAPL_1f <- lm(AAPL-RF ~ Mkt.RF, data = data)
summary(AAPL_1f)
## 
## Call:
## lm(formula = AAPL - RF ~ Mkt.RF, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -31.966  -3.359   0.304   4.564  17.708 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.1769     0.5221   2.254   0.0253 *  
## Mkt.RF        1.1784     0.1092  10.794   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.218 on 194 degrees of freedom
## Multiple R-squared:  0.3752, Adjusted R-squared:  0.372 
## F-statistic: 116.5 on 1 and 194 DF,  p-value: < 2.2e-16
# Three-Factor Model
# Ref:
AAPL_3f <- lm(AAPL-RF ~ Mkt.RF + SMB + HML, data = data)
summary(AAPL_3f)
## 
## Call:
## lm(formula = AAPL - RF ~ Mkt.RF + SMB + HML, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.283  -3.353  -0.032   4.366  16.290 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9338     0.4909   1.902   0.0586 .  
## Mkt.RF        1.3283     0.1099  12.083  < 2e-16 ***
## SMB          -0.3410     0.2067  -1.650   0.1006    
## HML          -0.6811     0.1518  -4.486 1.25e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.757 on 192 degrees of freedom
## Multiple R-squared:  0.458,  Adjusted R-squared:  0.4496 
## F-statistic: 54.09 on 3 and 192 DF,  p-value: < 2.2e-16
# Four-Factor Model
# Ref:
AAPL_4f <- lm(AAPL-RF ~ Mkt.RF + SMB + HML + MOM, data = data)
summary(AAPL_4f)
## 
## Call:
## lm(formula = AAPL - RF ~ Mkt.RF + SMB + HML + MOM, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -27.9873  -3.4547  -0.0483   4.3781  16.3360 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92628    0.49274   1.880   0.0617 .  
## Mkt.RF       1.33904    0.11626  11.518  < 2e-16 ***
## SMB         -0.33438    0.20842  -1.604   0.1103    
## HML         -0.66880    0.15809  -4.231 3.61e-05 ***
## MOM          0.03418    0.11826   0.289   0.7729    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.774 on 191 degrees of freedom
## Multiple R-squared:  0.4583, Adjusted R-squared:  0.4469 
## F-statistic:  40.4 on 4 and 191 DF,  p-value: < 2.2e-16
# Five-Factor Model
# Ref:
AAPL_5f <- lm(AAPL-RF ~ Mkt.RF + SMB + HML + RMW + CMA, data = data)
summary(AAPL_5f)
## 
## Call:
## lm(formula = AAPL - RF ~ Mkt.RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.9811  -3.5060  -0.0402   4.2576  16.0626 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.7651     0.5056   1.513   0.1319    
## Mkt.RF        1.3176     0.1132  11.638   <2e-16 ***
## SMB          -0.1999     0.2207  -0.906   0.3663    
## HML          -0.6381     0.1968  -3.243   0.0014 ** 
## RMW           0.5513     0.2738   2.014   0.0455 *  
## CMA          -0.2148     0.3222  -0.667   0.5058    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.717 on 190 degrees of freedom
## Multiple R-squared:  0.4701, Adjusted R-squared:  0.4561 
## F-statistic: 33.71 on 5 and 190 DF,  p-value: < 2.2e-16
# Five-Factor Model + Momentum Factor
# Ref:
AAPL_6f <- lm(AAPL-RF ~ Mkt.RF + SMB + HML + RMW + CMA + MOM, data = data)
summary(AAPL_6f)
## 
## Call:
## lm(formula = AAPL - RF ~ Mkt.RF + SMB + HML + RMW + CMA + MOM, 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -28.5319  -3.5468   0.2156   4.2713  15.8681 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.76042    0.50681   1.500   0.1352    
## Mkt.RF       1.33144    0.11770  11.312   <2e-16 ***
## SMB         -0.19148    0.22204  -0.862   0.3896    
## HML         -0.60690    0.20942  -2.898   0.0042 ** 
## RMW          0.55341    0.27440   2.017   0.0451 *  
## CMA         -0.24585    0.33041  -0.744   0.4578    
## MOM          0.05329    0.12025   0.443   0.6582    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.731 on 189 degrees of freedom
## Multiple R-squared:  0.4706, Adjusted R-squared:  0.4538 
## F-statistic: 28.01 on 6 and 189 DF,  p-value: < 2.2e-16
# Expected Return, Variance and SD Implied by the Models

# Example: Single Factor Model

model  <- AAPL_1f
alpha  <- model[["coefficients"]][["(Intercept)"]]
beta1  <- model[["coefficients"]][["Mkt.RF"]]
ave_F1 <- mean(data$Mkt.RF)
var_F1 <- var(data$Mkt.RF)

EX_AAPL <- alpha + beta1 * ave_F1
VX_AAPL <- beta1^2 * var_F1
SD_AAPL <- sqrt(VX_AAPL)

print(c(EX_AAPL, SD_AAPL))
## [1] 2.067393 5.579100
# Example: Three-Factor Model

model  <- AAPL_3f
alpha  <- model[["coefficients"]][["(Intercept)"]]
beta1  <- model[["coefficients"]][["Mkt.RF"]]
beta2  <- model[["coefficients"]][["SMB"]]
beta3  <- model[["coefficients"]][["HML"]]
ave_F1 <- mean(data$Mkt.RF)
ave_F2 <- mean(data$SMB)
ave_F3 <- mean(data$HML)
var_F1 <- var(data$Mkt.RF)
var_F2 <- var(data$SMB)
var_F3 <- var(data$HML)

EX_AAPL <- alpha + beta1 * ave_F1 + beta2 * ave_F2 + beta3 * ave_F3
VX_AAPL <- beta1^2 * var_F1 + beta2^2 * var_F2 + beta3^2 * var_F3
SD_AAPL <- sqrt(VX_AAPL)

print(c(EX_AAPL, SD_AAPL))
## [1] 2.067393 6.753934

Factor Models of a Portfolio

# Let's choose a portfolio weight. This is up to you how you would do the portfolio allocation.

w1 <- 0.2 #AAPL
w2 <- 0.2 #BAC
w3 <- 0.2 #AXP
w4 <- 0.2 #KO
w5 <- 0.2 #CVX

data$PORT <- w1*data$AAPL + w2*data$BAC + w3*data$AXP + w4*data$KO + w5*data$KO

hist(data$PORT)

plot(data$PORT)

# Model Estimation 

# Single Factor Model: Capital Asset Pricing Model 
# Ref: 
PORT_1f <- lm(PORT-RF ~ Mkt.RF, data = data)
summary(PORT_1f)
## 
## Call:
## lm(formula = PORT - RF ~ Mkt.RF, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.8391  -1.5833   0.1316   1.5966  10.4050 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.06956    0.22473   -0.31    0.757    
## Mkt.RF       1.08563    0.04699   23.10   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.107 on 194 degrees of freedom
## Multiple R-squared:  0.7334, Adjusted R-squared:  0.732 
## F-statistic: 533.7 on 1 and 194 DF,  p-value: < 2.2e-16
# Three-Factor Model
# Ref:
PORT_3f <- lm(PORT-RF ~ Mkt.RF + SMB + HML, data = data)
summary(PORT_3f)
## 
## Call:
## lm(formula = PORT - RF ~ Mkt.RF + SMB + HML, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.8818  -1.6646   0.1348   1.6307   9.6572 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.003546   0.201839   0.018    0.986    
## Mkt.RF       1.104350   0.045201  24.432  < 2e-16 ***
## SMB         -0.342317   0.084977  -4.028 8.09e-05 ***
## HML          0.421827   0.062438   6.756 1.65e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.779 on 192 degrees of freedom
## Multiple R-squared:  0.789,  Adjusted R-squared:  0.7857 
## F-statistic: 239.3 on 3 and 192 DF,  p-value: < 2.2e-16
# Four-Factor Model
# Ref:
PORT_4f <- lm(PORT-RF ~ Mkt.RF + SMB + HML + MOM, data = data)
summary(PORT_4f)
## 
## Call:
## lm(formula = PORT - RF ~ Mkt.RF + SMB + HML + MOM, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.4068  -1.5627   0.1103   1.8343   7.8714 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.03196    0.19889   0.161  0.87252    
## Mkt.RF       1.06396    0.04693  22.673  < 2e-16 ***
## SMB         -0.36728    0.08413  -4.366 2.07e-05 ***
## HML          0.37531    0.06381   5.882 1.79e-08 ***
## MOM         -0.12888    0.04773  -2.700  0.00756 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.734 on 191 degrees of freedom
## Multiple R-squared:  0.7967, Adjusted R-squared:  0.7925 
## F-statistic: 187.2 on 4 and 191 DF,  p-value: < 2.2e-16
# Five-Factor Model
# Ref:
PORT_5f <- lm(PORT-RF ~ Mkt.RF + SMB + HML + RMW + CMA, data = data)
summary(PORT_5f)
## 
## Call:
## lm(formula = PORT - RF ~ Mkt.RF + SMB + HML + RMW + CMA, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.2840  -1.6244   0.2145   1.7008   9.6228 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.11524    0.20661   0.558   0.5777    
## Mkt.RF       1.07337    0.04626  23.202  < 2e-16 ***
## SMB         -0.38911    0.09020  -4.314 2.58e-05 ***
## HML          0.55415    0.08041   6.892 7.89e-11 ***
## RMW         -0.07276    0.11187  -0.650   0.5162    
## CMA         -0.32408    0.13166  -2.461   0.0147 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.745 on 190 degrees of freedom
## Multiple R-squared:  0.7962, Adjusted R-squared:  0.7909 
## F-statistic: 148.5 on 5 and 190 DF,  p-value: < 2.2e-16
# Five-Factor Model + Momentum Factor
# Ref:
PORT_6f <- lm(PORT-RF ~ Mkt.RF + SMB + HML + RMW + CMA + MOM, data = data)
summary(PORT_6f)
## 
## Call:
## lm(formula = PORT - RF ~ Mkt.RF + SMB + HML + RMW + CMA + MOM, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -11.637  -1.615   0.244   1.750   7.035 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.12487    0.20450   0.611   0.5422    
## Mkt.RF       1.04503    0.04749  22.004  < 2e-16 ***
## SMB         -0.40635    0.08959  -4.536 1.02e-05 ***
## HML          0.49038    0.08450   5.803 2.70e-08 ***
## RMW         -0.07704    0.11072  -0.696   0.4874    
## CMA         -0.26069    0.13332  -1.955   0.0520 .  
## MOM         -0.10879    0.04852  -2.242   0.0261 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.716 on 189 degrees of freedom
## Multiple R-squared:  0.8015, Adjusted R-squared:  0.7952 
## F-statistic: 127.2 on 6 and 189 DF,  p-value: < 2.2e-16
# You can also calculate the expected returns and the variance and the SD of this portfolio