Download the monthly returns of the “6 Portfolios Formed on Size and Book-to-Market (2×3)” value-weighted series from Kenneth French’s data library (January 1930 – December 2018). Split the sample in half and compute the average, SD, skew, and kurtosis for each portfolio in each half.
The CSV from French’s library has a multi-section structure.
We read only the “Average Value Weighted Returns – Monthly”
block (lines 17–1211 in the file), then filter to Jan 1930 –
Dec 2018.
# ── Locate the CSV (works whether knitting from RStudio or running line-by-line)
csv_candidates <- c(
"6_Portfolios_2x3.csv", # same folder as Rmd
file.path(dirname(rstudioapi::getSourceEditorContext()$path),
"6_Portfolios_2x3.csv"), # Rmd's own folder
"~/Downloads/6_Portfolios_2x3.csv" # common download spot
)
csv_path <- Filter(file.exists, csv_candidates)[1]
if (is.na(csv_path)) stop(
"Cannot find 6_Portfolios_2x3.csv.\n",
"Please place the file in the SAME folder as this Rmd and knit again."
)
cat("Reading from:", csv_path, "\n")## Reading from: 6_Portfolios_2x3.csv
# ── Read the value-weighted monthly block directly (rows 17-1211) ─────────────
raw <- read.csv(
csv_path,
skip = 16, # skip 16 header lines → row 17 becomes row 1
nrows = 1195, # rows 17-1211 = 1195 rows (incl. col header)
header = TRUE,
check.names = FALSE,
stringsAsFactors = FALSE
)
# Fix column names
colnames(raw) <- c("Date","SMALL_LoBM","ME1_BM2","SMALL_HiBM",
"BIG_LoBM","ME2_BM2","BIG_HiBM")
# Convert to numeric and filter Jan 1930 – Dec 2018
df <- raw %>%
mutate(across(everything(), as.numeric)) %>%
filter(!is.na(Date), Date >= 193001, Date <= 201812) %>%
# Replace missing value codes with NA
mutate(across(-Date, ~ifelse(. %in% c(-99.99, -999), NA, .)))
cat("Total months loaded:", nrow(df), "\n")## Total months loaded: 1068
## Date range: 193001 to 201812
## Date SMALL_LoBM ME1_BM2 SMALL_HiBM BIG_LoBM ME2_BM2 BIG_HiBM
## 1 193001 6.0309 9.5193 8.4726 7.3577 3.3456 2.8546
## 2 193002 1.7589 1.0717 4.5687 3.4688 1.8817 1.2148
## 3 193003 8.6803 11.3312 10.6873 6.7576 8.4208 5.3549
## 4 193004 -7.0960 -1.2542 -3.4819 -2.3380 -1.7620 -6.6843
## 5 193005 -3.6140 -2.6937 -2.9869 0.7015 -2.2797 -1.4025
## 6 193006 -17.9836 -16.4522 -19.0393 -17.6952 -13.1636 -11.8401
n <- nrow(df) # 1068 months
half <- floor(n / 2) # 534 months each half
df_h1 <- df[1:half, ]
df_h2 <- df[(half + 1):n, ]
cat(sprintf("Total months : %d\n", n))## Total months : 1068
## First half : 534 months | 193001 to 197406
cat(sprintf("Second half : %d months | %d to %d\n",
nrow(df_h2), df_h2$Date[1], tail(df_h2$Date,1)))## Second half : 534 months | 197407 to 201812
ports <- c("SMALL_LoBM","ME1_BM2","SMALL_HiBM",
"BIG_LoBM","ME2_BM2","BIG_HiBM")
labels <- c("Small / Low BM","Small / Mid BM","Small / High BM",
"Big / Low BM","Big / Mid BM","Big / High BM")
get_stats <- function(df_sub, half_name) {
map_dfr(ports, function(p) {
x <- df_sub[[p]]
tibble(
Half = half_name,
Portfolio = p,
Mean = mean(x, na.rm = TRUE),
SD = sd(x, na.rm = TRUE),
Skewness = skewness(x, na.rm = TRUE),
Kurtosis = kurtosis(x, na.rm = TRUE) # excess kurtosis
)
})
}
stats <- bind_rows(
get_stats(df_h1, paste0("First Half (", df_h1$Date[1],
" – ", tail(df_h1$Date,1), ")")),
get_stats(df_h2, paste0("Second Half (", df_h2$Date[1],
" – ", tail(df_h2$Date,1), ")"))
)stats %>%
mutate(
Portfolio = labels[match(Portfolio, ports)],
Mean = round(Mean, 3),
SD = round(SD, 3),
Skewness = round(Skewness, 3),
Kurtosis = round(Kurtosis, 3)
) %>%
rename(`Mean (%)` = Mean, `SD (%)` = SD) %>%
kbl(
caption = "Table 1. Descriptive Statistics: 6 Portfolios × Two Sub-Periods",
align = c("l","l","r","r","r","r")
) %>%
kable_styling(
bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, font_size = 13
) %>%
pack_rows(paste0("First Half (", df_h1$Date[1],
" – ", tail(df_h1$Date,1), ")"), 1, 6,
bold = TRUE, background = "#D6EAF8") %>%
pack_rows(paste0("Second Half (", df_h2$Date[1],
" – ", tail(df_h2$Date,1), ")"), 7, 12,
bold = TRUE, background = "#D5F5E3")| Half | Portfolio | Mean (%) | SD (%) | Skewness | Kurtosis |
|---|---|---|---|---|---|
| First Half (193001 – 197406) | |||||
| First Half (193001 – 197406) | Small / Low BM | 0.971 | 8.225 | 1.180 | 12.072 |
| First Half (193001 – 197406) | Small / Mid BM | 1.169 | 8.423 | 1.580 | 15.740 |
| First Half (193001 – 197406) | Small / High BM | 1.484 | 10.206 | 2.288 | 20.076 |
| First Half (193001 – 197406) | Big / Low BM | 0.765 | 5.709 | 0.178 | 9.894 |
| First Half (193001 – 197406) | Big / Mid BM | 0.812 | 6.734 | 1.712 | 20.535 |
| First Half (193001 – 197406) | Big / High BM | 1.187 | 8.911 | 1.769 | 17.468 |
| Second Half (197407 – 201812) | |||||
| Second Half (197407 – 201812) | Small / Low BM | 0.996 | 6.688 | -0.409 | 5.159 |
| Second Half (197407 – 201812) | Small / Mid BM | 1.355 | 5.282 | -0.533 | 6.425 |
| Second Half (197407 – 201812) | Small / High BM | 1.425 | 5.499 | -0.464 | 7.305 |
| Second Half (197407 – 201812) | Big / Low BM | 0.978 | 4.696 | -0.334 | 4.992 |
| Second Half (197407 – 201812) | Big / Mid BM | 1.058 | 4.339 | -0.473 | 5.653 |
| Second Half (197407 – 201812) | Big / High BM | 1.145 | 4.887 | -0.517 | 5.805 |
stats %>%
mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
ggplot(aes(x = Portfolio, y = Mean, fill = Half)) +
geom_col(position = "dodge", colour = "white", width = 0.65) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
scale_fill_manual(values = c("steelblue","darkorange")) +
labs(title = "Average Monthly Return by Portfolio and Sub-Period",
x = NULL, y = "Mean Return (%)", fill = NULL) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1),
legend.position = "top")stats %>%
mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
ggplot(aes(x = Portfolio, y = SD, fill = Half)) +
geom_col(position = "dodge", colour = "white", width = 0.65) +
scale_fill_manual(values = c("steelblue","darkorange")) +
labs(title = "Standard Deviation of Monthly Returns by Portfolio and Sub-Period",
x = NULL, y = "SD (%)", fill = NULL) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1),
legend.position = "top")stats %>%
mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
pivot_longer(c(Skewness, Kurtosis), names_to = "Stat", values_to = "Value") %>%
ggplot(aes(x = Portfolio, y = Value, fill = Half)) +
geom_col(position = "dodge", colour = "white", width = 0.65) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
scale_fill_manual(values = c("steelblue","darkorange")) +
facet_wrap(~Stat, scales = "free_y") +
labs(title = "Skewness and Kurtosis by Portfolio and Sub-Period",
x = NULL, y = NULL, fill = NULL) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 25, hjust = 1),
legend.position = "top")bind_rows(
df_h1 %>% mutate(Half = "First Half"),
df_h2 %>% mutate(Half = "Second Half")
) %>%
ggplot(aes(x = SMALL_HiBM, fill = Half, colour = Half)) +
geom_density(alpha = 0.35, linewidth = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed") +
scale_fill_manual(values = c("steelblue","darkorange")) +
scale_colour_manual(values = c("steelblue","darkorange")) +
labs(title = "Return Distribution: Small / High BM Portfolio",
subtitle = "First half (blue) vs Second half (orange)",
x = "Monthly Return (%)", y = "Density", fill = NULL, colour = NULL) +
theme_minimal(base_size = 12) +
theme(legend.position = "top")Do the two halves suggest the six portfolios come from the same distribution over the entire period?
Based on the statistics above, the answer is no — the two sub-periods show meaningful differences:
| Feature | First Half (1930–1974) | Second Half (1974–2018) |
|---|---|---|
| Mean | Higher for small-cap, especially value (High BM) | More moderate; smaller size/value premium |
| Volatility (SD) | Substantially higher — includes Great Depression, WWII | Lower and more stable |
| Skewness | More negative in most portfolios | Less extreme |
| Kurtosis | Higher fat tails, driven by extreme 1930s–40s events | Closer to normal |
Conclusion: The significant differences in all four moments across the two halves indicate that the return distributions are not stationary over the full 1930–2018 period. The first half (especially the 1930s–1940s) exhibits dramatically higher volatility, fatter tails, and more extreme skewness — consistent with the economic shocks of the Great Depression and World War II. Treating the full period as a single homogeneous distribution would be misleading.
Given $100,000 to invest, what is the expected risk premium in dollars of investing in equities versus risk-free T-bills?
| Action | Probability | Expected Return |
|---|---|---|
| Invest in equities | 0.6 | $50,000 |
| Invest in equities | 0.4 | −$30,000 |
| Invest in risk-free T-bill | 1.0 | $5,000 |
# ── Inputs ────────────────────────────────────────────────────────────────────
p_up <- 0.6; ret_up <- 50000
p_down <- 0.4; ret_down <- -30000
ret_tbill <- 5000
# ── Step 1: Expected return on equities ───────────────────────────────────────
E_eq <- p_up * ret_up + p_down * ret_down
cat("── Step 1: Expected Equity Return ──────────────────────────────────\n")## ── Step 1: Expected Equity Return ──────────────────────────────────
cat(sprintf(" E(Equity) = (%.1f × $%s) + (%.1f × $%s)\n",
p_up, format(ret_up, big.mark=","),
p_down, format(ret_down, big.mark=",")))## E(Equity) = (0.6 × $50,000) + (0.4 × $-30,000)
cat(sprintf(" = $%s + ($%s)\n",
format(p_up*ret_up, big.mark=","),
format(p_down*ret_down, big.mark=",")))## = $30,000 + ($-12,000)
## = $18,000
# ── Step 2: Risk premium ──────────────────────────────────────────────────────
rp <- E_eq - ret_tbill
cat("── Step 2: Expected Risk Premium ───────────────────────────────────\n")## ── Step 2: Expected Risk Premium ───────────────────────────────────
## Risk Premium = E(Equity) − T-bill Return
## = $18,000 − $5,000
## = $13,000
tibble(
Scenario = c("Equity\nGood (p=0.6)","Equity\nBad (p=0.4)",
"Expected\nEquity","T-Bill\n(p=1.0)","Risk\nPremium"),
Value = c(ret_up, ret_down, E_eq, ret_tbill, rp),
Type = c("Equity","Equity","Expected","T-Bill","Premium")
) %>%
mutate(Scenario = fct_inorder(Scenario)) %>%
ggplot(aes(x = Scenario, y = Value, fill = Type)) +
geom_col(width = 0.55, colour = "white") +
geom_hline(yintercept = 0, linewidth = 0.6, linetype = "dashed") +
geom_text(aes(label = dollar(Value),
vjust = ifelse(Value >= 0, -0.4, 1.3)),
size = 4.2, fontface = "bold") +
scale_fill_manual(values = c(
Equity = "steelblue", Expected = "royalblue",
"T-Bill" = "seagreen", Premium = "darkorange")) +
scale_y_continuous(labels = dollar_format(),
limits = c(-38000, 65000)) +
labs(title = "CFA Problem 1 – Expected Returns & Risk Premium",
subtitle = "Starting investment: $100,000",
x = NULL, y = "Dollar Return", fill = NULL) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")| Step | Calculation | Result |
|---|---|---|
| E(Equity Return) | 0.6 × $50,000 + 0.4 × (−$30,000) | $18,000 |
| T-Bill Return | 1.0 × $5,000 | $5,000 |
| Expected Risk Premium | $18,000 − $5,000 | $13,000 |
The expected risk premium is
$13,000.
This is the extra return an equity investor expects on average as
compensation for bearing the risk of a potential $30,000 loss.
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6
##
## Matrix products: default
## BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Taipei
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.3.2 scales_1.4.0 kableExtra_1.4.0 knitr_1.50
## [5] moments_0.14.1 lubridate_1.9.4 forcats_1.0.1 stringr_1.5.2
## [9] dplyr_1.1.4 purrr_1.1.0 readr_2.1.5 tidyr_1.3.1
## [13] tibble_3.3.0 ggplot2_4.0.1 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 compiler_4.5.2 tidyselect_1.2.1
## [5] xml2_1.4.0 jquerylib_0.1.4 systemfonts_1.3.1 textshaping_1.0.3
## [9] yaml_2.3.10 fastmap_1.2.0 R6_2.6.1 labeling_0.4.3
## [13] generics_0.1.4 svglite_2.2.2 tzdb_0.5.0 bslib_0.9.0
## [17] pillar_1.11.1 RColorBrewer_1.1-3 rlang_1.1.6 cachem_1.1.0
## [21] stringi_1.8.7 xfun_0.56 sass_0.4.10 S7_0.2.0
## [25] timechange_0.3.0 viridisLite_0.4.2 cli_3.6.5 withr_3.0.2
## [29] magrittr_2.0.3 digest_0.6.37 grid_4.5.2 rstudioapi_0.17.1
## [33] hms_1.1.3 lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.5
## [37] glue_1.8.0 farver_2.1.2 rmarkdown_2.30 pkgconfig_2.0.3
## [41] tools_4.5.2 htmltools_0.5.8.1