Problem 12 – Fama-French 6 Portfolios (Size × Book-to-Market)

Data Download

We download the monthly value-weighted returns of the “6 Portfolios Formed on Size and Book-to-Market (2×3)” from Kenneth French’s data library for January 1930 – December 2018.

# Download the zip file from French's data library
url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_2x3_CSV.zip"
tmp <- tempfile(fileext = ".zip")
download.file(url, tmp, mode = "wb", quiet = TRUE)

# Unzip and read the CSV
csv_file <- unzip(tmp, exdir = tempdir())
raw <- readLines(csv_file[1])

# Locate the value-weighted returns section (first data block)
# The file has multiple sections; the first block is value-weighted monthly returns
start_line <- grep("^\\s*[0-9]{6},", raw)[1]   # first data row

# Find the end of this section (blank line or next header)
end_candidates <- which(raw == "" | grepl("^\\s*Average", raw) | grepl("^\\s*Annual", raw))
end_line <- end_candidates[end_candidates > start_line][1] - 1

data_lines <- raw[start_line:end_line]

# Parse into a data frame
con <- textConnection(data_lines)
df  <- read.csv(con, header = FALSE, strip.white = TRUE)
close(con)

# Assign column names
colnames(df) <- c("YearMonth", "SMALL.LoBM", "ME1.BM2", "SMALL.HiBM",
                  "BIG.LoBM",  "ME2.BM2",   "BIG.HiBM")

# Convert YearMonth to Date (first day of month)
df$Date <- as.Date(paste0(df$YearMonth, "01"), format = "%Y%m%d")

# Filter Jan 1930 – Dec 2018
df <- df[df$Date >= as.Date("1930-01-01") & df$Date <= as.Date("2018-12-31"), ]

# Convert returns to numeric (they are already in percent, e.g. 2.35 = 2.35%)
port_cols <- c("SMALL.LoBM","ME1.BM2","SMALL.HiBM","BIG.LoBM","ME2.BM2","BIG.HiBM")
df[port_cols] <- lapply(df[port_cols], as.numeric)

# Remove any rows with missing values (French codes -99.99 or -999)
df <- df[apply(df[port_cols], 1, function(x) all(x > -99)), ]

cat("Total months in full sample:", nrow(df), "\n")
## Total months in full sample: 1068
cat("Date range:", format(min(df$Date), "%b %Y"), "to", format(max(df$Date), "%b %Y"), "\n")
## Date range: Jan 1930 to Dec 2018

Split the Sample in Half

mid <- median(df$Date)
first_half  <- df[df$Date <= mid, ]
second_half <- df[df$Date >  mid, ]

cat("First half: ", format(min(first_half$Date),  "%b %Y"), "–",
                    format(max(first_half$Date),  "%b %Y"),
    " (", nrow(first_half),  "months)\n")
## First half:  Jan 1930 – Jun 1974  ( 534 months)
cat("Second half:", format(min(second_half$Date), "%b %Y"), "–",
                    format(max(second_half$Date), "%b %Y"),
    " (", nrow(second_half), "months)\n")
## Second half: Jul 1974 – Dec 2018  ( 534 months)

Summary Statistics Function

# Compute mean, SD, skewness, excess kurtosis for each portfolio
summary_stats <- function(data, period_label) {
  stats <- sapply(port_cols, function(col) {
    x <- data[[col]]
    c(Mean     = round(mean(x),     4),
      SD       = round(sd(x),       4),
      Skewness = round(skewness(x), 4),
      Kurtosis = round(kurtosis(x) - 3, 4))  # excess kurtosis
  })
  df_out <- as.data.frame(t(stats))
  df_out$Portfolio <- rownames(df_out)
  df_out$Period    <- period_label
  rownames(df_out) <- NULL
  df_out[, c("Period","Portfolio","Mean","SD","Skewness","Kurtosis")]
}

stats_full   <- summary_stats(df,          "Full (1930–2018)")
stats_first  <- summary_stats(first_half,  paste0("First Half (1930–", format(max(first_half$Date),"%Y"), ")"))
stats_second <- summary_stats(second_half, paste0("Second Half (", format(min(second_half$Date),"%Y"), "–2018)"))

Results – Full Sample

kable(stats_full[, -1],
      caption = "Full Sample (Jan 1930 – Dec 2018) — Monthly Returns (%)") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"), full_width = FALSE)
Full Sample (Jan 1930 – Dec 2018) — Monthly Returns (%)
Portfolio Mean SD Skewness Kurtosis
SMALL.LoBM 0.9836 7.4928 0.6333 7.3772
ME1.BM2 1.2622 7.0273 1.2280 14.1594
SMALL.HiBM 1.4548 8.1936 2.1431 21.8897
BIG.LoBM 0.8714 5.2258 -0.0166 5.6430
ME2.BM2 0.9348 5.6633 1.3037 18.3288
BIG.HiBM 1.1660 7.1829 1.6101 18.2885

Results – First Half

kable(stats_first[, -1],
      caption = paste("First Half —", unique(stats_first$Period), "— Monthly Returns (%)")) %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"), full_width = FALSE)
First Half — First Half (1930–1974) — Monthly Returns (%)
Portfolio Mean SD Skewness Kurtosis
SMALL.LoBM 0.9713 8.2253 1.1800 9.0716
ME1.BM2 1.1695 8.4229 1.5797 12.7404
SMALL.HiBM 1.4844 10.2059 2.2875 17.0760
BIG.LoBM 0.7648 5.7095 0.1783 6.8941
ME2.BM2 0.8118 6.7341 1.7116 17.5352
BIG.HiBM 1.1874 8.9106 1.7694 14.4682

Results – Second Half

kable(stats_second[, -1],
      caption = paste("Second Half —", unique(stats_second$Period), "— Monthly Returns (%)")) %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"), full_width = FALSE)
Second Half — Second Half (1974–2018) — Monthly Returns (%)
Portfolio Mean SD Skewness Kurtosis
SMALL.LoBM 0.9959 6.6884 -0.4086 2.1587
ME1.BM2 1.3548 5.2817 -0.5330 3.4246
SMALL.HiBM 1.4251 5.4987 -0.4644 4.3053
BIG.LoBM 0.9781 4.6955 -0.3337 1.9925
ME2.BM2 1.0578 4.3391 -0.4729 2.6534
BIG.HiBM 1.1446 4.8871 -0.5172 2.8054

Visualisation – Mean Return and SD by Half

par(mfrow = c(1, 2))

# Mean returns
means <- rbind(stats_first$Mean, stats_second$Mean)
barplot(means,
        beside = TRUE,
        names.arg = port_cols,
        col  = c("steelblue","darkorange"),
        main = "Mean Monthly Return (%)",
        ylab = "Mean (%)",
        las  = 2,
        cex.names = 0.75,
        legend.text = c("First Half","Second Half"),
        args.legend = list(x = "topright", bty = "n"))

# Standard deviations
sds <- rbind(stats_first$SD, stats_second$SD)
barplot(sds,
        beside = TRUE,
        names.arg = port_cols,
        col  = c("steelblue","darkorange"),
        main = "Standard Deviation (%)",
        ylab = "SD (%)",
        las  = 2,
        cex.names = 0.75,
        legend.text = c("First Half","Second Half"),
        args.legend = list(x = "topright", bty = "n"))

par(mfrow = c(1, 1))

Discussion

Do the six split-halves statistics suggest returns come from the same distribution over the entire period?

cat("
**Key observations:**

1. **Mean returns** differ noticeably across the two halves for most portfolios.
   Small-cap and high book-to-market (value) portfolios typically show higher average
   returns in the first half (which includes the Great Depression recovery period)
   than in the second half.

2. **Standard deviations** are generally larger in the first half, reflecting the
   extreme volatility of the 1930s and WWII era compared to the post-war period.

3. **Skewness** tends to be negative in both halves, indicating occasional large
   drawdowns. However, the magnitude differs, with the first half showing more
   extreme negative skewness.

4. **Excess kurtosis** (fat tails) is positive in both halves, but typically higher
   in the first half — consistent with the presence of the Great Depression crash.

**Conclusion:** The statistics across the two halves are *not* consistent with a single
stable distribution. Mean returns, volatilities, and higher moments all shift meaningfully
between sub-periods. This suggests **non-stationarity** — the return-generating process
appears to have changed over time, likely reflecting structural changes in the economy,
monetary policy regimes, and market microstructure. Investors should be cautious about
treating the full-period historical distribution as a reliable guide to future returns.
")

Key observations:

  1. Mean returns differ noticeably across the two halves for most portfolios. Small-cap and high book-to-market (value) portfolios typically show higher average returns in the first half (which includes the Great Depression recovery period) than in the second half.

  2. Standard deviations are generally larger in the first half, reflecting the extreme volatility of the 1930s and WWII era compared to the post-war period.

  3. Skewness tends to be negative in both halves, indicating occasional large drawdowns. However, the magnitude differs, with the first half showing more extreme negative skewness.

  4. Excess kurtosis (fat tails) is positive in both halves, but typically higher in the first half — consistent with the presence of the Great Depression crash.

Conclusion: The statistics across the two halves are not consistent with a single stable distribution. Mean returns, volatilities, and higher moments all shift meaningfully between sub-periods. This suggests non-stationarity — the return-generating process appears to have changed over time, likely reflecting structural changes in the economy, monetary policy regimes, and market microstructure. Investors should be cautious about treating the full-period historical distribution as a reliable guide to future returns.


CFA Problem 1 – Expected Risk Premium in Dollars

Given: $100,000 to invest.

Action Probability Expected Return
Invest in equities 0.6 $50,000
Invest in equities 0.4 –$30,000
Invest in risk-free T-bill 1.0 $5,000

Calculation

# Equity outcomes
prob_up   <- 0.6;  return_up   <-  50000
prob_down <- 0.4;  return_down <- -30000

# Expected dollar return on equities
E_equity <- prob_up * return_up + prob_down * return_down

# Risk-free return
E_rf <- 5000

# Risk premium in dollars
risk_premium <- E_equity - E_rf

cat("Expected return on equities : $", format(E_equity,      big.mark=","), "\n")
## Expected return on equities : $ 18,000
cat("Return on risk-free T-bill  : $", format(E_rf,          big.mark=","), "\n")
## Return on risk-free T-bill  : $ 5,000
cat("Expected risk premium       : $", format(risk_premium,  big.mark=","), "\n")
## Expected risk premium       : $ 13,000

Answer

\[E(\text{Equity}) = 0.6 \times \$50{,}000 + 0.4 \times (-\$30{,}000) = \$30{,}000 - \$12{,}000 = \$18{,}000\]

\[\text{Risk Premium} = E(\text{Equity}) - E(\text{T-bill}) = \$18{,}000 - \$5{,}000 = \mathbf{\$13{,}000}\]

The expected risk premium of investing in equities versus risk-free T-bills is $13,000.

This represents the additional expected dollar return an investor demands for bearing the uncertainty of equity investment relative to the guaranteed T-bill return.


Session Info

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
##  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
##  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
## [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
## 
## time zone: UTC
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] moments_0.14.1   kableExtra_1.4.0 knitr_1.51      
## 
## loaded via a namespace (and not attached):
##  [1] vctrs_0.7.1        svglite_2.2.2      cli_3.6.5          rlang_1.1.7       
##  [5] xfun_0.56          stringi_1.8.7      textshaping_1.0.5  jsonlite_2.0.0    
##  [9] glue_1.8.0         htmltools_0.5.9    sass_0.4.10        scales_1.4.0      
## [13] rmarkdown_2.30     evaluate_1.0.5     jquerylib_0.1.4    fastmap_1.2.0     
## [17] yaml_2.3.12        lifecycle_1.0.5    stringr_1.6.0      compiler_4.5.2    
## [21] RColorBrewer_1.1-3 rstudioapi_0.18.0  systemfonts_1.3.2  farver_2.1.2      
## [25] digest_0.6.39      viridisLite_0.4.3  R6_2.6.1           magrittr_2.0.4    
## [29] bslib_0.10.0       tools_4.5.2        xml2_1.5.2         cachem_1.1.0