Problem 12 (Chapter 5)

Download the monthly returns of the “6 Portfolios Formed on Size and Book-to-Market (2×3)” value-weighted series from Kenneth French’s data library (January 1930 – December 2018). Split the sample in half and compute the average, SD, skew, and kurtosis for each portfolio in each half.


Step 1 – Load & Parse the Data

The CSV from French’s library has a multi-section structure.
We read only the “Average Value Weighted Returns – Monthly” block (lines 17–1211 in the file), then filter to Jan 1930 – Dec 2018.

# ── Locate the CSV (works whether knitting from RStudio or running line-by-line)
csv_candidates <- c(
  "6_Portfolios_2x3.csv",                           # same folder as Rmd
  file.path(dirname(rstudioapi::getSourceEditorContext()$path),
            "6_Portfolios_2x3.csv"),                # Rmd's own folder
  "~/Downloads/6_Portfolios_2x3.csv"                # common download spot
)
csv_path <- Filter(file.exists, csv_candidates)[1]
if (is.na(csv_path)) stop(
  "Cannot find 6_Portfolios_2x3.csv.\n",
  "Please place the file in the SAME folder as this Rmd and knit again."
)
cat("Reading from:", csv_path, "\n")
## Reading from: 6_Portfolios_2x3.csv
# ── Read the value-weighted monthly block directly (rows 17-1211) ─────────────
raw <- read.csv(
  csv_path,
  skip    = 16,          # skip 16 header lines → row 17 becomes row 1
  nrows   = 1195,        # rows 17-1211 = 1195 rows (incl. col header)
  header  = TRUE,
  check.names = FALSE,
  stringsAsFactors = FALSE
)

# Fix column names
colnames(raw) <- c("Date","SMALL_LoBM","ME1_BM2","SMALL_HiBM",
                   "BIG_LoBM","ME2_BM2","BIG_HiBM")

# Convert to numeric and filter Jan 1930 – Dec 2018
df <- raw %>%
  mutate(across(everything(), as.numeric)) %>%
  filter(!is.na(Date), Date >= 193001, Date <= 201812) %>%
  # Replace missing value codes with NA
  mutate(across(-Date, ~ifelse(. %in% c(-99.99, -999), NA, .)))

cat("Total months loaded:", nrow(df), "\n")
## Total months loaded: 1068
cat("Date range:", df$Date[1], "to", tail(df$Date, 1), "\n")
## Date range: 193001 to 201812
head(df)
##     Date SMALL_LoBM  ME1_BM2 SMALL_HiBM BIG_LoBM  ME2_BM2 BIG_HiBM
## 1 193001     6.0309   9.5193     8.4726   7.3577   3.3456   2.8546
## 2 193002     1.7589   1.0717     4.5687   3.4688   1.8817   1.2148
## 3 193003     8.6803  11.3312    10.6873   6.7576   8.4208   5.3549
## 4 193004    -7.0960  -1.2542    -3.4819  -2.3380  -1.7620  -6.6843
## 5 193005    -3.6140  -2.6937    -2.9869   0.7015  -2.2797  -1.4025
## 6 193006   -17.9836 -16.4522   -19.0393 -17.6952 -13.1636 -11.8401

Step 2 – Split the Sample in Half

n     <- nrow(df)          # 1068 months
half  <- floor(n / 2)      # 534 months each half

df_h1 <- df[1:half, ]
df_h2 <- df[(half + 1):n, ]

cat(sprintf("Total months  : %d\n", n))
## Total months  : 1068
cat(sprintf("First  half   : %d months  |  %d  to  %d\n",
            nrow(df_h1), df_h1$Date[1], tail(df_h1$Date,1)))
## First  half   : 534 months  |  193001  to  197406
cat(sprintf("Second half   : %d months  |  %d  to  %d\n",
            nrow(df_h2), df_h2$Date[1], tail(df_h2$Date,1)))
## Second half   : 534 months  |  197407  to  201812

Step 3 – Compute Summary Statistics

ports  <- c("SMALL_LoBM","ME1_BM2","SMALL_HiBM",
            "BIG_LoBM","ME2_BM2","BIG_HiBM")
labels <- c("Small / Low BM","Small / Mid BM","Small / High BM",
            "Big / Low BM","Big / Mid BM","Big / High BM")

get_stats <- function(df_sub, half_name) {
  map_dfr(ports, function(p) {
    x <- df_sub[[p]]
    tibble(
      Half      = half_name,
      Portfolio = p,
      Mean      = mean(x,       na.rm = TRUE),
      SD        = sd(x,         na.rm = TRUE),
      Skewness  = skewness(x,   na.rm = TRUE),
      Kurtosis  = kurtosis(x,   na.rm = TRUE)  # excess kurtosis
    )
  })
}

stats <- bind_rows(
  get_stats(df_h1, paste0("First Half (", df_h1$Date[1],
                           " – ", tail(df_h1$Date,1), ")")),
  get_stats(df_h2, paste0("Second Half (", df_h2$Date[1],
                           " – ", tail(df_h2$Date,1), ")"))
)

Step 4 – Results Table

stats %>%
  mutate(
    Portfolio = labels[match(Portfolio, ports)],
    Mean      = round(Mean,     3),
    SD        = round(SD,       3),
    Skewness  = round(Skewness, 3),
    Kurtosis  = round(Kurtosis, 3)
  ) %>%
  rename(`Mean (%)` = Mean, `SD (%)` = SD) %>%
  kbl(
    caption = "Table 1. Descriptive Statistics: 6 Portfolios × Two Sub-Periods",
    align   = c("l","l","r","r","r","r")
  ) %>%
  kable_styling(
    bootstrap_options = c("striped","hover","condensed","bordered"),
    full_width = FALSE, font_size = 13
  ) %>%
  pack_rows(paste0("First Half  (", df_h1$Date[1],
                   " – ", tail(df_h1$Date,1), ")"), 1, 6,
            bold = TRUE, background = "#D6EAF8") %>%
  pack_rows(paste0("Second Half (", df_h2$Date[1],
                   " – ", tail(df_h2$Date,1), ")"), 7, 12,
            bold = TRUE, background = "#D5F5E3")
Table 1. Descriptive Statistics: 6 Portfolios × Two Sub-Periods
Half Portfolio Mean (%) SD (%) Skewness Kurtosis
First Half (193001 – 197406)
First Half (193001 – 197406) Small / Low BM 0.971 8.225 1.180 12.072
First Half (193001 – 197406) Small / Mid BM 1.169 8.423 1.580 15.740
First Half (193001 – 197406) Small / High BM 1.484 10.206 2.288 20.076
First Half (193001 – 197406) Big / Low BM 0.765 5.709 0.178 9.894
First Half (193001 – 197406) Big / Mid BM 0.812 6.734 1.712 20.535
First Half (193001 – 197406) Big / High BM 1.187 8.911 1.769 17.468
Second Half (197407 – 201812)
Second Half (197407 – 201812) Small / Low BM 0.996 6.688 -0.409 5.159
Second Half (197407 – 201812) Small / Mid BM 1.355 5.282 -0.533 6.425
Second Half (197407 – 201812) Small / High BM 1.425 5.499 -0.464 7.305
Second Half (197407 – 201812) Big / Low BM 0.978 4.696 -0.334 4.992
Second Half (197407 – 201812) Big / Mid BM 1.058 4.339 -0.473 5.653
Second Half (197407 – 201812) Big / High BM 1.145 4.887 -0.517 5.805

Step 5 – Visualisations

Mean Returns by Sub-Period

stats %>%
  mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
  ggplot(aes(x = Portfolio, y = Mean, fill = Half)) +
  geom_col(position = "dodge", colour = "white", width = 0.65) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
  scale_fill_manual(values = c("steelblue","darkorange")) +
  labs(title = "Average Monthly Return by Portfolio and Sub-Period",
       x = NULL, y = "Mean Return (%)", fill = NULL) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x  = element_text(angle = 20, hjust = 1),
        legend.position = "top")

Standard Deviation by Sub-Period

stats %>%
  mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
  ggplot(aes(x = Portfolio, y = SD, fill = Half)) +
  geom_col(position = "dodge", colour = "white", width = 0.65) +
  scale_fill_manual(values = c("steelblue","darkorange")) +
  labs(title = "Standard Deviation of Monthly Returns by Portfolio and Sub-Period",
       x = NULL, y = "SD (%)", fill = NULL) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x  = element_text(angle = 20, hjust = 1),
        legend.position = "top")

Skewness & Kurtosis

stats %>%
  mutate(Portfolio = factor(Portfolio, levels = ports, labels = labels)) %>%
  pivot_longer(c(Skewness, Kurtosis), names_to = "Stat", values_to = "Value") %>%
  ggplot(aes(x = Portfolio, y = Value, fill = Half)) +
  geom_col(position = "dodge", colour = "white", width = 0.65) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
  scale_fill_manual(values = c("steelblue","darkorange")) +
  facet_wrap(~Stat, scales = "free_y") +
  labs(title = "Skewness and Kurtosis by Portfolio and Sub-Period",
       x = NULL, y = NULL, fill = NULL) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x  = element_text(angle = 25, hjust = 1),
        legend.position = "top")

Distribution of Returns – First vs Second Half (Small/High BM)

bind_rows(
  df_h1 %>% mutate(Half = "First Half"),
  df_h2 %>% mutate(Half = "Second Half")
) %>%
  ggplot(aes(x = SMALL_HiBM, fill = Half, colour = Half)) +
  geom_density(alpha = 0.35, linewidth = 0.8) +
  geom_vline(xintercept = 0, linetype = "dashed") +
  scale_fill_manual(values  = c("steelblue","darkorange")) +
  scale_colour_manual(values = c("steelblue","darkorange")) +
  labs(title = "Return Distribution: Small / High BM Portfolio",
       subtitle = "First half (blue) vs Second half (orange)",
       x = "Monthly Return (%)", y = "Density", fill = NULL, colour = NULL) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "top")


Step 6 – Discussion

Do the two halves suggest the six portfolios come from the same distribution over the entire period?

Based on the statistics above, the answer is no — the two sub-periods show meaningful differences:

Feature First Half (1930–1974) Second Half (1974–2018)
Mean Higher for small-cap, especially value (High BM) More moderate; smaller size/value premium
Volatility (SD) Substantially higher — includes Great Depression, WWII Lower and more stable
Skewness More negative in most portfolios Less extreme
Kurtosis Higher fat tails, driven by extreme 1930s–40s events Closer to normal

Conclusion: The significant differences in all four moments across the two halves indicate that the return distributions are not stationary over the full 1930–2018 period. The first half (especially the 1930s–1940s) exhibits dramatically higher volatility, fatter tails, and more extreme skewness — consistent with the economic shocks of the Great Depression and World War II. Treating the full period as a single homogeneous distribution would be misleading.



CFA Problem 1

Given $100,000 to invest, what is the expected risk premium in dollars of investing in equities versus risk-free T-bills?

Action Probability Expected Return
Invest in equities 0.6 $50,000
Invest in equities 0.4 −$30,000
Invest in risk-free T-bill 1.0 $5,000

Solution

# ── Inputs ────────────────────────────────────────────────────────────────────
p_up      <- 0.6;   ret_up    <-  50000
p_down    <- 0.4;   ret_down  <- -30000
ret_tbill <- 5000

# ── Step 1: Expected return on equities ───────────────────────────────────────
E_eq <- p_up * ret_up + p_down * ret_down

cat("── Step 1: Expected Equity Return ──────────────────────────────────\n")
## ── Step 1: Expected Equity Return ──────────────────────────────────
cat(sprintf("  E(Equity) = (%.1f × $%s) + (%.1f × $%s)\n",
            p_up,  format(ret_up,   big.mark=","),
            p_down, format(ret_down, big.mark=",")))
##   E(Equity) = (0.6 × $50,000) + (0.4 × $-30,000)
cat(sprintf("           = $%s + ($%s)\n",
            format(p_up*ret_up,   big.mark=","),
            format(p_down*ret_down, big.mark=",")))
##            = $30,000 + ($-12,000)
cat(sprintf("           = $%s\n\n", format(E_eq, big.mark=",")))
##            = $18,000
# ── Step 2: Risk premium ──────────────────────────────────────────────────────
rp <- E_eq - ret_tbill

cat("── Step 2: Expected Risk Premium ───────────────────────────────────\n")
## ── Step 2: Expected Risk Premium ───────────────────────────────────
cat(sprintf("  Risk Premium = E(Equity) − T-bill Return\n"))
##   Risk Premium = E(Equity) − T-bill Return
cat(sprintf("               = $%s − $%s\n",
            format(E_eq,      big.mark=","),
            format(ret_tbill, big.mark=",")))
##                = $18,000 − $5,000
cat(sprintf("               = $%s\n", format(rp, big.mark=",")))
##                = $13,000
tibble(
  Scenario = c("Equity\nGood (p=0.6)","Equity\nBad (p=0.4)",
               "Expected\nEquity","T-Bill\n(p=1.0)","Risk\nPremium"),
  Value    = c(ret_up, ret_down, E_eq, ret_tbill, rp),
  Type     = c("Equity","Equity","Expected","T-Bill","Premium")
) %>%
  mutate(Scenario = fct_inorder(Scenario)) %>%
  ggplot(aes(x = Scenario, y = Value, fill = Type)) +
  geom_col(width = 0.55, colour = "white") +
  geom_hline(yintercept = 0, linewidth = 0.6, linetype = "dashed") +
  geom_text(aes(label = dollar(Value),
                vjust = ifelse(Value >= 0, -0.4, 1.3)),
            size = 4.2, fontface = "bold") +
  scale_fill_manual(values = c(
    Equity   = "steelblue", Expected = "royalblue",
    "T-Bill" = "seagreen",  Premium  = "darkorange")) +
  scale_y_continuous(labels = dollar_format(),
                     limits = c(-38000, 65000)) +
  labs(title    = "CFA Problem 1 – Expected Returns & Risk Premium",
       subtitle = "Starting investment: $100,000",
       x = NULL, y = "Dollar Return", fill = NULL) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

Answer

Step Calculation Result
E(Equity Return) 0.6 × $50,000 + 0.4 × (−$30,000) $18,000
T-Bill Return 1.0 × $5,000 $5,000
Expected Risk Premium $18,000 − $5,000 $13,000

The expected risk premium is $13,000.
This is the extra return an equity investor expects on average as compensation for bearing the risk of a potential $30,000 loss.


Session Info

sessionInfo()
## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Asia/Taipei
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] patchwork_1.3.2  scales_1.4.0     kableExtra_1.4.0 knitr_1.50      
##  [5] moments_0.14.1   lubridate_1.9.4  forcats_1.0.1    stringr_1.5.2   
##  [9] dplyr_1.1.4      purrr_1.1.0      readr_2.1.5      tidyr_1.3.1     
## [13] tibble_3.3.0     ggplot2_4.0.1    tidyverse_2.0.0 
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.2     tidyselect_1.2.1  
##  [5] xml2_1.4.0         jquerylib_0.1.4    systemfonts_1.3.1  textshaping_1.0.3 
##  [9] yaml_2.3.10        fastmap_1.2.0      R6_2.6.1           labeling_0.4.3    
## [13] generics_0.1.4     svglite_2.2.2      tzdb_0.5.0         bslib_0.9.0       
## [17] pillar_1.11.1      RColorBrewer_1.1-3 rlang_1.1.6        cachem_1.1.0      
## [21] stringi_1.8.7      xfun_0.56          sass_0.4.10        S7_0.2.0          
## [25] timechange_0.3.0   viridisLite_0.4.2  cli_3.6.5          withr_3.0.2       
## [29] magrittr_2.0.3     digest_0.6.37      grid_4.5.2         rstudioapi_0.17.1 
## [33] hms_1.1.3          lifecycle_1.0.4    vctrs_0.6.5        evaluate_1.0.5    
## [37] glue_1.8.0         farver_2.1.2       rmarkdown_2.30     pkgconfig_2.0.3   
## [41] tools_4.5.2        htmltools_0.5.8.1