=========================

Load Data

=========================

data <- read_csv("data (1).csv")
## Rows: 7542 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): symbol, date
## dbl (4): ri, MKT, SMB, HML
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 6
##   symbol date             ri      MKT     SMB     HML
##   <chr>  <chr>         <dbl>    <dbl>   <dbl>   <dbl>
## 1 AAPL   4-Jan-11   0.00521  -0.00131 -0.0065  0.0008
## 2 AAPL   5-Jan-11   0.00815   0.00499  0.0018  0.0013
## 3 AAPL   6-Jan-11  -0.000808 -0.00213  0.0001 -0.0025
## 4 AAPL   7-Jan-11   0.00714  -0.00185  0.0022 -0.0006
## 5 AAPL   10-Jan-11  0.0187   -0.00138  0.0041  0.0039
## 6 AAPL   11-Jan-11 -0.00237   0.00372  0.0016  0.0036

=========================

Data Preparation

=========================

data <- data %>%
  mutate(date = as.Date(date, format = "%d-%b-%y"))

=========================

Summary Statistics

=========================

summary(data)
##     symbol               date                  ri            
##  Length:7542        Min.   :2011-01-04   Min.   :-0.3908663  
##  Class :character   1st Qu.:2012-04-03   1st Qu.:-0.0087263  
##  Mode  :character   Median :2013-07-05   Median : 0.0000000  
##                     Mean   :2013-07-03   Mean   : 0.0002109  
##                     3rd Qu.:2014-10-02   3rd Qu.: 0.0093507  
##                     Max.   :2015-12-31   Max.   : 0.9614112  
##       MKT                  SMB                  HML          
##  Min.   :-0.0689583   Min.   :-1.660e-02   Min.   :-0.01490  
##  1st Qu.:-0.0040125   1st Qu.:-3.100e-03   1st Qu.:-0.00260  
##  Median : 0.0005438   Median : 1.000e-04   Median : 0.00000  
##  Mean   : 0.0003774   Mean   : 2.227e-06   Mean   : 0.00013  
##  3rd Qu.: 0.0052641   3rd Qu.: 3.100e-03   3rd Qu.: 0.00260  
##  Max.   : 0.0463174   Max.   : 2.490e-02   Max.   : 0.02250

=========================

Fama-French Regression

=========================

Model: ri ~ MKT + SMB + HML

ff_model <- lm(ri ~ MKT + SMB + HML, data = data)
summary(ff_model)
## 
## Call:
## lm(formula = ri ~ MKT + SMB + HML, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.39161 -0.00644 -0.00021  0.00660  0.97046 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.0001403  0.0002893  -0.485    0.628    
## MKT          0.9264396  0.0296491  31.247   <2e-16 ***
## SMB         -0.0001304  0.0603643  -0.002    0.998    
## HML          0.0120079  0.0655537   0.183    0.855    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0251 on 7538 degrees of freedom
## Multiple R-squared:  0.1148, Adjusted R-squared:  0.1144 
## F-statistic: 325.9 on 3 and 7538 DF,  p-value: < 2.2e-16

=========================

Regression per Stock

=========================

results <- data %>%
  group_by(symbol) %>%
  do(tidy(lm(ri ~ MKT + SMB + HML, data = .)))

results
## # A tibble: 24 × 6
## # Groups:   symbol [6]
##    symbol term         estimate std.error statistic   p.value
##    <chr>  <chr>           <dbl>     <dbl>     <dbl>     <dbl>
##  1 AAPL   (Intercept)  0.000365  0.000405     0.900 3.68e-  1
##  2 AAPL   MKT          0.900     0.0415      21.7   9.11e- 89
##  3 AAPL   SMB          0.0685    0.0845       0.811 4.18e-  1
##  4 AAPL   HML         -0.0578    0.0918      -0.630 5.29e-  1
##  5 FORD   (Intercept) -0.000794  0.00156     -0.508 6.11e-  1
##  6 FORD   MKT          0.513     0.160        3.20  1.39e-  3
##  7 FORD   SMB         -0.264     0.326       -0.812 4.17e-  1
##  8 FORD   HML          0.138     0.354        0.390 6.97e-  1
##  9 GE     (Intercept)  0.000142  0.000237     0.599 5.49e-  1
## 10 GE     MKT          1.08      0.0243      44.4   9.87e-260
## # ℹ 14 more rows

=========================

Coefficient Table (Clean)

=========================

coef_table <- data %>%
  group_by(symbol) %>%
  do(glance(lm(ri ~ MKT + SMB + HML, data = .)))

coef_table
## # A tibble: 6 × 13
## # Groups:   symbol [6]
##   symbol r.squared adj.r.squared   sigma statistic   p.value    df logLik    AIC
##   <chr>      <dbl>         <dbl>   <dbl>     <dbl>     <dbl> <dbl>  <dbl>  <dbl>
## 1 AAPL     0.273         0.271   0.0143     157.   2.89e- 86     3  3553. -7097.
## 2 FORD     0.00904       0.00667 0.0553       3.81 9.80e-  3     3  1857. -3704.
## 3 GE       0.613         0.612   0.00838    660.   2.48e-257     3  4229. -8448.
## 4 GM       0.438         0.437   0.0142     325.   3.31e-156     3  3564. -7119.
## 5 IBM      0.425         0.424   0.00927    309.   3.02e-150     3  4103. -8195.
## 6 MSFT     0.405         0.404   0.0114     285.   6.34e-141     3  3840. -7671.
## # ℹ 4 more variables: BIC <dbl>, deviance <dbl>, df.residual <int>, nobs <int>

=========================

Visualization

=========================

Market vs Return

ggplot(data, aes(x = MKT, y = ri)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  labs(title = "Return vs Market Factor")
## `geom_smooth()` using formula = 'y ~ x'


=========================

Interpretation

=========================

The regression shows how stock returns are explained by these three factors.


=========================

Conclusion

=========================

The Fama-French model explains stock returns using multiple risk factors. Results indicate that market risk (MKT) is usually the most significant predictor, while SMB and HML provide additional explanatory power depending on the stock.