# -------------------------------------------------------
# Load required libraries
# broom : tidy() converts model output into data frames
# tidyverse : data wrangling and piping with dplyr + purrr
# knitr : kable() for formatted tables in the report
# -------------------------------------------------------
library(broom)
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.1 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
# -------------------------------------------------------
# Load the dataset
# The panel contains daily returns for 6 U.S. stocks
# along with daily Fama-French three-factor values
# -------------------------------------------------------
data <- read.csv("/Users/faizhaikal/Downloads/data.csv")
# Quick preview of the data structure
glimpse(data)
## Rows: 7,542
## Columns: 6
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",…
## $ date <chr> "4-Jan-11", "5-Jan-11", "6-Jan-11", "7-Jan-11", "10-Jan-11", "1…
## $ ri <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.0186…
## $ MKT <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.0…
## $ SMB <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.002…
## $ HML <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.00…
# -------------------------------------------------------
# Summary statistics for each variable
# -------------------------------------------------------
data %>%
select(ri, MKT, SMB, HML) %>%
summary()
## ri MKT SMB
## Min. :-0.3908663 Min. :-0.0689583 Min. :-1.660e-02
## 1st Qu.:-0.0087263 1st Qu.:-0.0040125 1st Qu.:-3.100e-03
## Median : 0.0000000 Median : 0.0005438 Median : 1.000e-04
## Mean : 0.0002109 Mean : 0.0003774 Mean : 2.227e-06
## 3rd Qu.: 0.0093507 3rd Qu.: 0.0052641 3rd Qu.: 3.100e-03
## Max. : 0.9614112 Max. : 0.0463174 Max. : 2.490e-02
## HML
## Min. :-0.01490
## 1st Qu.:-0.00260
## Median : 0.00000
## Mean : 0.00013
## 3rd Qu.: 0.00260
## Max. : 0.02250
# -------------------------------------------------------
# Display the first few rows in a formatted table
# -------------------------------------------------------
head(data, 10) %>%
kable(
caption = "Table 1: First 10 Observations of the Dataset",
digits = 6,
align = "c"
)
Table 1: First 10 Observations of the Dataset
| AAPL |
4-Jan-11 |
0.005206 |
-0.001314 |
-0.0065 |
0.0008 |
| AAPL |
5-Jan-11 |
0.008146 |
0.004995 |
0.0018 |
0.0013 |
| AAPL |
6-Jan-11 |
-0.000808 |
-0.002125 |
0.0001 |
-0.0025 |
| AAPL |
7-Jan-11 |
0.007136 |
-0.001847 |
0.0022 |
-0.0006 |
| AAPL |
10-Jan-11 |
0.018657 |
-0.001377 |
0.0041 |
0.0039 |
| AAPL |
11-Jan-11 |
-0.002368 |
0.003718 |
0.0016 |
0.0036 |
| AAPL |
12-Jan-11 |
0.008104 |
0.008967 |
0.0031 |
0.0000 |
| AAPL |
13-Jan-11 |
0.003652 |
-0.001712 |
-0.0026 |
-0.0044 |
| AAPL |
14-Jan-11 |
0.008067 |
0.007357 |
-0.0010 |
-0.0073 |
| AAPL |
18-Jan-11 |
-0.022725 |
0.001375 |
0.0056 |
0.0015 |
# -------------------------------------------------------
# STEP 0: Time-Series Regressions
#
# For each stock (symbol), regress ri on MKT, SMB, HML
# across all time periods to obtain factor betas.
# -------------------------------------------------------
step0_betas <- data %>%
# Group data by stock and nest the time-series data for each stock
nest(data = c(date, ri, MKT, SMB, HML)) %>%
# For each stock, run OLS: ri = alpha + b_MKT*MKT + b_SMB*SMB + b_HML*HML
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
# Unnest to bring model results into the main data frame
unnest(estimates) %>%
# Keep only the symbol, coefficient name, and coefficient value
select(symbol, estimate, term) %>%
# Pivot from long to wide: each factor gets its own column
pivot_wider(
names_from = term,
values_from = estimate
) %>%
# Rename for clarity; drop the intercept (not needed in Step 1)
select(
symbol,
b_MKT = MKT,
b_SMB = SMB,
b_HML = HML
)
# Display the estimated betas for each stock
step0_betas %>%
kable(
caption = "Table 2: Estimated Factor Betas from Time-Series Regressions (Step 0)",
digits = 4,
align = "c"
)
Table 2: Estimated Factor Betas from Time-Series Regressions
(Step 0)
| AAPL |
0.9000 |
0.0685 |
-0.0578 |
| FORD |
0.5129 |
-0.2644 |
0.1380 |
| GE |
1.0779 |
0.0994 |
0.0902 |
| GM |
1.2854 |
0.0039 |
-0.0222 |
| IBM |
0.8169 |
0.0336 |
-0.0121 |
| MSFT |
0.9656 |
0.0582 |
-0.0641 |
# -------------------------------------------------------
# Merge the estimated betas back into the original dataset
# so that each daily observation now carries its stock's
# factor loadings — ready for the cross-sectional step.
# -------------------------------------------------------
step0 <- data %>%
left_join(step0_betas, by = "symbol")
# Preview the merged dataset
head(step0, 6) %>%
kable(
caption = "Table 3: Dataset After Merging Factor Betas (Step 0 Output)",
digits = 6,
align = "c"
)
Table 3: Dataset After Merging Factor Betas (Step 0
Output)
| AAPL |
4-Jan-11 |
0.005206 |
-0.001314 |
-0.0065 |
0.0008 |
0.900006 |
0.068535 |
-0.057821 |
| AAPL |
5-Jan-11 |
0.008146 |
0.004995 |
0.0018 |
0.0013 |
0.900006 |
0.068535 |
-0.057821 |
| AAPL |
6-Jan-11 |
-0.000808 |
-0.002125 |
0.0001 |
-0.0025 |
0.900006 |
0.068535 |
-0.057821 |
| AAPL |
7-Jan-11 |
0.007136 |
-0.001847 |
0.0022 |
-0.0006 |
0.900006 |
0.068535 |
-0.057821 |
| AAPL |
10-Jan-11 |
0.018657 |
-0.001377 |
0.0041 |
0.0039 |
0.900006 |
0.068535 |
-0.057821 |
| AAPL |
11-Jan-11 |
-0.002368 |
0.003718 |
0.0016 |
0.0036 |
0.900006 |
0.068535 |
-0.057821 |
# -------------------------------------------------------
# STEP 1: Cross-Sectional Regressions
#
# For each date (t), regress ri on b_MKT, b_SMB, b_HML
# across all stocks to obtain daily risk premia (lambdas).
# -------------------------------------------------------
step1_lambdas <- step0 %>%
# Group data by date and nest the cross-sectional data for each date
nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>%
# For each date, run OLS: ri = lambda0 + lam_MKT*b_MKT + ...
mutate(estimates = map(
data,
~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
)) %>%
# Unnest the results
unnest(estimates) %>%
# Keep only the date, coefficient name, and coefficient value
select(date, estimate, term) %>%
# Pivot to wide format: one column per lambda
pivot_wider(
names_from = term,
values_from = estimate
) %>%
# Select and rename the risk premia columns
select(
date,
lam_MKT = b_MKT,
lam_SMB = b_SMB,
lam_HML = b_HML
)
# Preview the first few rows of daily risk premia
head(step1_lambdas, 10) %>%
kable(
caption = "Table 4: Daily Cross-Sectional Risk Premia (Step 1 Output) — First 10 Days",
digits = 6,
align = "c"
)
Table 4: Daily Cross-Sectional Risk Premia (Step 1 Output) —
First 10 Days
| 4-Jan-11 |
0.041629 |
-0.025520 |
0.057372 |
| 5-Jan-11 |
-0.011347 |
-0.158046 |
0.062847 |
| 6-Jan-11 |
0.037301 |
0.007029 |
-0.173234 |
| 7-Jan-11 |
0.012722 |
0.032269 |
-0.064226 |
| 10-Jan-11 |
-0.036631 |
0.017123 |
0.058646 |
| 11-Jan-11 |
0.004089 |
-0.095361 |
0.089858 |
| 12-Jan-11 |
-0.055365 |
-0.164496 |
0.043036 |
| 13-Jan-11 |
-0.019357 |
0.001815 |
0.025630 |
| 14-Jan-11 |
-0.016486 |
0.063259 |
0.039214 |
| 18-Jan-11 |
0.010146 |
0.052508 |
-0.090027 |
# -------------------------------------------------------
# STEP 2a: Compute time-series averages of risk premia
# -------------------------------------------------------
lambda_summary <- step1_lambdas %>%
summarise(
Mean_MKT = mean(lam_MKT, na.rm = TRUE),
Mean_SMB = mean(lam_SMB, na.rm = TRUE),
Mean_HML = mean(lam_HML, na.rm = TRUE),
SD_MKT = sd(lam_MKT, na.rm = TRUE),
SD_SMB = sd(lam_SMB, na.rm = TRUE),
SD_HML = sd(lam_HML, na.rm = TRUE),
N = n()
)
lambda_summary %>%
kable(
caption = "Table 5: Summary Statistics of Daily Risk Premia",
digits = 6,
align = "c"
)
Table 5: Summary Statistics of Daily Risk Premia
| -0.000412 |
0.003683 |
-0.000467 |
0.03857 |
0.133626 |
0.091705 |
1257 |
# -------------------------------------------------------
# STEP 2b: One-sample t-tests for each risk premium
#
# H0: lambda = 0 (the factor earns no risk premium)
# H1: lambda ≠ 0 (the factor earns a nonzero risk premium)
#
# We test at the conventional 5% significance level.
# -------------------------------------------------------
cat("============================================================\n")
## ============================================================
cat(" FAMA-MACBETH HYPOTHESIS TESTS: H0: lambda = 0\n")
## FAMA-MACBETH HYPOTHESIS TESTS: H0: lambda = 0
cat("============================================================\n\n")
## ============================================================
cat("--- Factor 1: Market Risk Premium (MKT) ---\n")
## --- Factor 1: Market Risk Premium (MKT) ---
ttest_MKT <- t.test(step1_lambdas$lam_MKT, mu = 0)
print(ttest_MKT)
##
## One Sample t-test
##
## data: step1_lambdas$lam_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
cat("\n--- Factor 2: Size Premium (SMB) ---\n")
##
## --- Factor 2: Size Premium (SMB) ---
ttest_SMB <- t.test(step1_lambdas$lam_SMB, mu = 0)
print(ttest_SMB)
##
## One Sample t-test
##
## data: step1_lambdas$lam_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
cat("\n--- Factor 3: Value Premium (HML) ---\n")
##
## --- Factor 3: Value Premium (HML) ---
ttest_HML <- t.test(step1_lambdas$lam_HML, mu = 0)
print(ttest_HML)
##
## One Sample t-test
##
## data: step1_lambdas$lam_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146
# -------------------------------------------------------
# Compile a clean results table for easy reading
# -------------------------------------------------------
results_table <- tibble(
Factor = c("MKT (Market)", "SMB (Size)", "HML (Value)"),
Mean_Lambda = c(ttest_MKT$estimate, ttest_SMB$estimate, ttest_HML$estimate),
t_Statistic = c(ttest_MKT$statistic, ttest_SMB$statistic, ttest_HML$statistic),
p_Value = c(ttest_MKT$p.value, ttest_SMB$p.value, ttest_HML$p.value),
CI_Lower = c(ttest_MKT$conf.int[1], ttest_SMB$conf.int[1], ttest_HML$conf.int[1]),
CI_Upper = c(ttest_MKT$conf.int[2], ttest_SMB$conf.int[2], ttest_HML$conf.int[2]),
Significant = c(
ifelse(ttest_MKT$p.value < 0.05, "Yes ***", "No"),
ifelse(ttest_SMB$p.value < 0.05, "Yes ***", "No"),
ifelse(ttest_HML$p.value < 0.05, "Yes ***", "No")
)
)
results_table %>%
kable(
caption = "Table 6: Fama-MacBeth Regression Results Summary",
digits = c(0, 6, 4, 4, 6, 6, 0),
align = "c"
)
Table 6: Fama-MacBeth Regression Results Summary
| MKT (Market) |
-0.000412 |
-0.3788 |
0.7049 |
-0.002546 |
0.001722 |
No |
| SMB (Size) |
0.003683 |
0.9771 |
0.3287 |
-0.003711 |
0.011077 |
No |
| HML (Value) |
-0.000467 |
-0.1804 |
0.8568 |
-0.005541 |
0.004608 |
No |
# -------------------------------------------------------
# Plot the distribution of daily lambdas for each factor
# to visually inspect whether the average differs from zero
# -------------------------------------------------------
step1_lambdas %>%
pivot_longer(
cols = c(lam_MKT, lam_SMB, lam_HML),
names_to = "Factor",
values_to = "Lambda"
) %>%
mutate(Factor = recode(Factor,
lam_MKT = "MKT (Market)",
lam_SMB = "SMB (Size)",
lam_HML = "HML (Value)"
)) %>%
ggplot(aes(x = Lambda, fill = Factor)) +
geom_histogram(bins = 60, alpha = 0.75, colour = "white") +
geom_vline(xintercept = 0, colour = "black", linetype = "dashed", linewidth = 0.8) +
facet_wrap(~Factor, scales = "free") +
scale_fill_manual(values = c("#2980b9", "#27ae60", "#e74c3c")) +
labs(
title = "Distribution of Daily Cross-Sectional Risk Premia",
subtitle = "Dashed line marks zero; a distribution centred away from zero indicates a nonzero risk premium",
x = "Lambda (Risk Premium)",
y = "Frequency",
caption = "Source: Fama-French Three-Factor Model | Daily data, Jan 2011 – Dec 2015"
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "none",
strip.text = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(colour = "grey40", size = 11)
)

# -------------------------------------------------------
# Plot the time-series of each daily lambda
# -------------------------------------------------------
step1_lambdas %>%
mutate(date = as.Date(date, format = "%d-%b-%y")) %>%
pivot_longer(
cols = c(lam_MKT, lam_SMB, lam_HML),
names_to = "Factor",
values_to = "Lambda"
) %>%
mutate(Factor = recode(Factor,
lam_MKT = "MKT (Market)",
lam_SMB = "SMB (Size)",
lam_HML = "HML (Value)"
)) %>%
ggplot(aes(x = date, y = Lambda, colour = Factor)) +
geom_line(alpha = 0.6, linewidth = 0.4) +
geom_hline(yintercept = 0, colour = "black", linetype = "dashed") +
facet_wrap(~Factor, ncol = 1, scales = "free_y") +
scale_colour_manual(values = c("#2980b9", "#27ae60", "#e74c3c")) +
labs(
title = "Time-Series of Daily Cross-Sectional Risk Premia",
x = "Date",
y = "Lambda (Risk Premium)",
caption = "Source: Fama-French Three-Factor Model | Daily data, Jan 2011 – Dec 2015"
) +
theme_minimal(base_size = 13) +
theme(
legend.position = "none",
strip.text = element_text(face = "bold"),
plot.title = element_text(face = "bold", size = 14)
)

Conclusion This study successfully reproduced the Eugene Fama–James
MacBeth (1973) two-pass regression methodology using daily return data
from six U.S. stocks over the 2011–2015 period, within the framework of
the Fama–French three-factor model. The analysis was conducted in three
main stages. Step 0 (Time-Series Regressions): Separate time-series
regressions were estimated for each of the six stocks to measure their
sensitivities (betas) to the market (MKT), size (SMB), and value (HML)
factors. Step 1 (Cross-Sectional Regressions): Daily cross-sectional
regressions were then performed, where stock returns for each trading
day were regressed on the previously estimated betas. This generated a
time series of daily factor risk premia (lambda estimates). Step 2
(Averaging and Statistical Testing): The estimated daily lambdas were
averaged across the sample period, and one-sample t-tests were conducted
to evaluate whether the average risk premia were significantly different
from zero. One of the principal strengths of the Fama–MacBeth
methodology compared with pooled OLS estimation is its ability to
account for cross-sectional dependence in asset returns. Since stock
returns are often correlated across firms, ignoring this feature can
lead to underestimated standard errors and overly optimistic statistical
inferences. Overall, the findings add to the extensive empirical
literature examining the validity and performance of the Fama–French
three-factor framework under varying market conditions. Further research
could expand the sample to include more firms, longer time horizons, or
additional explanatory factors such as momentum and profitability from
the five-factor model. Another potential extension would be the use of
rolling-window estimation techniques to investigate how factor risk
premia change over time.