Problem 12 (Chapter 5): Six Portfolios — Size and Book-to-Market

Download & Parse Data

url      <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_2x3_CSV.zip"
temp_zip <- tempfile(fileext = ".zip")
temp_dir <- tempdir()

download.file(url, temp_zip, mode = "wb")
unzip(temp_zip, exdir = temp_dir)

csv_file  <- file.path(temp_dir, "6_Portfolios_2x3.csv")
raw_lines <- readLines(csv_file)

# Data section "Average Value Weighted Returns -- Monthly"
# starts at line 15 (header), line 16 (col names), line 17 (data)
data_start  <- 17
blank_after <- which(trimws(raw_lines[data_start:length(raw_lines)]) == "")[1]
data_end    <- data_start + blank_after - 2

cat("Data from line", data_start, "to line", data_end, "\n")
## Data from line 17 to line 1211
cat("Monthly observations:", data_end - data_start + 1, "\n")
## Monthly observations: 1195
data_block <- raw_lines[data_start:data_end]

portfolio_raw <- read.csv(
  text       = paste(c("Date,SL,SM,SH,BL,BM,BH", data_block), collapse = "\n"),
  header     = TRUE,
  na.strings = c("-99.99", "-999")
)

portfolio_raw$Date <- as.integer(portfolio_raw$Date)

cat("Rows parsed:", nrow(portfolio_raw), "\n")
## Rows parsed: 1195
cat("Full date range:", min(portfolio_raw$Date, na.rm = TRUE),
    "to", max(portfolio_raw$Date, na.rm = TRUE), "\n")
## Full date range: 192607 to 202601
head(portfolio_raw)
##     Date      SL      SM      SH      BL      BM      BH
## 1 192607  1.0866  0.8807 -0.1275  5.5746  1.9060  2.0068
## 2 192608  0.7831  1.4677  5.4422  2.7268  2.7028  5.6834
## 3 192609 -2.8045 -0.0599 -0.4399  1.4777  0.0954 -0.7872
## 4 192610 -4.0289 -4.3615 -2.0128 -3.6327 -2.3451 -4.0040
## 5 192611  3.2971  3.6237  2.0877  3.2120  2.9346  3.1964
## 6 192612  2.5645  1.7773  3.2700  2.9011  2.6210  2.3073
portfolio_data <- portfolio_raw %>%
  filter(Date >= 193001, Date <= 201812) %>%
  drop_na()

cat("Observations (Jan 1930 – Dec 2018):", nrow(portfolio_data), "\n")
## Observations (Jan 1930 – Dec 2018): 1068
cat("Date range:", min(portfolio_data$Date), "to", max(portfolio_data$Date), "\n")
## Date range: 193001 to 201812

Split the Sample in Half

n    <- nrow(portfolio_data)
half <- floor(n / 2)

first_half  <- portfolio_data[1:half, ]
second_half <- portfolio_data[(half + 1):n, ]

cat("First half :", nrow(first_half),  "obs |",
    min(first_half$Date),  "to", max(first_half$Date),  "\n")
## First half : 534 obs | 193001 to 197406
cat("Second half:", nrow(second_half), "obs |",
    min(second_half$Date), "to", max(second_half$Date), "\n")
## Second half: 534 obs | 197407 to 201812

Summary Statistics

port_names  <- c("SL", "SM", "SH", "BL", "BM", "BH")
port_labels <- c(
  SL = "Small / Low BtM",
  SM = "Small / Med BtM",
  SH = "Small / High BtM",
  BL = "Big / Low BtM",
  BM = "Big / Med BtM",
  BH = "Big / High BtM"
)

compute_stats <- function(df, label) {
  map_dfr(port_names, function(p) {
    x <- df[[p]]
    tibble(
      Half      = label,
      Portfolio = port_labels[p],
      Mean      = round(mean(x,     na.rm = TRUE), 4),
      SD        = round(sd(x,       na.rm = TRUE), 4),
      Skewness  = round(skewness(x, na.rm = TRUE), 4),
      Kurtosis  = round(kurtosis(x, na.rm = TRUE), 4)
    )
  })
}

stats_first  <- compute_stats(first_half,  "First Half")
stats_second <- compute_stats(second_half, "Second Half")
all_stats    <- bind_rows(stats_first, stats_second)
stats_first %>%
  select(-Half) %>%
  kable(
    caption = paste0("First Half (", min(first_half$Date), " – ", max(first_half$Date), ")"),
    align   = "lrrrr"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "bordered"),
    full_width = FALSE
  )
First Half (193001 – 197406)
Portfolio Mean SD Skewness Kurtosis
Small / Low BtM 0.9713 8.2253 1.1800 12.0716
Small / Med BtM 1.1695 8.4229 1.5797 15.7404
Small / High BtM 1.4844 10.2059 2.2875 20.0760
Big / Low BtM 0.7648 5.7095 0.1783 9.8941
Big / Med BtM 0.8118 6.7341 1.7116 20.5352
Big / High BtM 1.1874 8.9106 1.7694 17.4682
stats_second %>%
  select(-Half) %>%
  kable(
    caption = paste0("Second Half (", min(second_half$Date), " – ", max(second_half$Date), ")"),
    align   = "lrrrr"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "bordered"),
    full_width = FALSE
  )
Second Half (197407 – 201812)
Portfolio Mean SD Skewness Kurtosis
Small / Low BtM 0.9959 6.6884 -0.4086 5.1587
Small / Med BtM 1.3548 5.2817 -0.5330 6.4246
Small / High BtM 1.4251 5.4987 -0.4644 7.3053
Big / Low BtM 0.9781 4.6955 -0.3337 4.9925
Big / Med BtM 1.0578 4.3391 -0.4729 5.6534
Big / High BtM 1.1446 4.8871 -0.5172 5.8054

Visual Comparison

all_stats %>%
  ggplot(aes(x = Portfolio, y = Mean, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Average Monthly Return by Portfolio and Sample Half",
    x = "Portfolio", y = "Mean Monthly Return (%)", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

all_stats %>%
  ggplot(aes(x = Portfolio, y = SD, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Standard Deviation by Portfolio and Sample Half",
    x = "Portfolio", y = "SD of Monthly Return (%)", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

all_stats %>%
  pivot_longer(cols = c(Skewness, Kurtosis), names_to = "Stat", values_to = "Value") %>%
  ggplot(aes(x = Portfolio, y = Value, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  facet_wrap(~ Stat, scales = "free_y") +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Skewness and Kurtosis by Portfolio and Sample Half",
    x = "Portfolio", y = "Value", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

Conclusion

full_join(
  stats_first  %>% rename(Mean_H1 = Mean, SD_H1 = SD, Skew_H1 = Skewness, Kurt_H1 = Kurtosis),
  stats_second %>% rename(Mean_H2 = Mean, SD_H2 = SD, Skew_H2 = Skewness, Kurt_H2 = Kurtosis),
  by = "Portfolio"
) %>%
  transmute(
    Portfolio,
    `ΔMean`     = round(Mean_H2 - Mean_H1, 4),
    `ΔSD`       = round(SD_H2   - SD_H1,   4),
    `ΔSkewness` = round(Skew_H2 - Skew_H1, 4),
    `ΔKurtosis` = round(Kurt_H2 - Kurt_H1, 4)
  ) %>%
  kable(caption = "Difference: Second Half minus First Half", align = "lrrrr") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "bordered"),
    full_width = FALSE
  )
Difference: Second Half minus First Half
Portfolio ΔMean ΔSD ΔSkewness ΔKurtosis
Small / Low BtM 0.0246 -1.5369 -1.5886 -6.9129
Small / Med BtM 0.1853 -3.1412 -2.1127 -9.3158
Small / High BtM -0.0593 -4.7072 -2.7519 -12.7707
Big / Low BtM 0.2133 -1.0140 -0.5120 -4.9016
Big / Med BtM 0.2460 -2.3950 -2.1845 -14.8818
Big / High BtM -0.0428 -4.0235 -2.2866 -11.6628

Conclusion: The split-half statistics suggest returns do not come from the same distribution over the full period. Mean returns, volatility, skewness, and kurtosis all differ meaningfully between halves — consistent with structural breaks, Great Depression era volatility, and time-varying risk premia across the 20th and 21st centuries.


CFA Problem 1: Expected Risk Premium

Setup

# Given $100,000 to invest
p_up    <- 0.6;  r_up    <-  50000   # Equities: gain $50,000 with prob 0.6
p_down  <- 0.4;  r_down  <- -30000   # Equities: lose $30,000 with prob 0.4
r_tbill <- 5000                       # T-bill: certain return of $5,000

Calculation

E_equity     <- p_up * r_up + p_down * r_down   # Expected return on equities
risk_premium <- E_equity - r_tbill               # Risk premium

cat("E[Equities]  = 0.6 ×", r_up,   "+ 0.4 ×", r_down,  "=", E_equity,     "\n")
## E[Equities]  = 0.6 × 50000 + 0.4 × -30000 = 18000
cat("E[T-bill]    =", r_tbill, "\n")
## E[T-bill]    = 5000
cat("Risk Premium =", E_equity, "-", r_tbill, "=", risk_premium, "\n")
## Risk Premium = 18000 - 5000 = 13000

Results Table

tibble(
  Calculation = c(
    "E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000)",
    "E[T-bill]   = 1.0 × $5,000",
    "Risk Premium = E[Equities] − E[T-bill]"
  ),
  Result = c(
    paste0("$", format(E_equity,     big.mark = ",")),
    paste0("$", format(r_tbill,      big.mark = ",")),
    paste0("$", format(risk_premium, big.mark = ","))
  )
) %>%
  kable(align = "lr") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "bordered"),
    full_width = FALSE
  ) %>%
  row_spec(3, bold = TRUE, background = "#d4edda")
Calculation Result
E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000) $18,000
E[T-bill] = 1.0 × $5,000 $5,000
Risk Premium = E[Equities] − E[T-bill] $13,000

Answer: The expected risk premium of investing in equities versus risk-free T-bills is $13,000.