Problem 12 (Chapter 5): Six Portfolios — Size and Book-to-Market

Download & Parse Data

url      <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_2x3_CSV.zip"
temp_zip <- tempfile(fileext = ".zip")
temp_dir <- tempdir()

download.file(url, temp_zip, mode = "wb")
unzip(temp_zip, exdir = temp_dir)

csv_file  <- file.path(temp_dir, "6_Portfolios_2x3.csv")
raw_lines <- readLines(csv_file)

# Data section "Average Value Weighted Returns -- Monthly"
# starts at line 15 (header), line 16 (col names), line 17 (data)
data_start  <- 17
blank_after <- which(trimws(raw_lines[data_start:length(raw_lines)]) == "")[1]
data_end    <- data_start + blank_after - 2

cat("Data from line", data_start, "to line", data_end, "\n")

## Data from line 17 to line 1211

cat("Monthly observations:", data_end - data_start + 1, "\n")

## Monthly observations: 1195

data_block <- raw_lines[data_start:data_end]

portfolio_raw <- read.csv(
  text       = paste(c("Date,SL,SM,SH,BL,BM,BH", data_block), collapse = "\n"),
  header     = TRUE,
  na.strings = c("-99.99", "-999")
)

portfolio_raw$Date <- as.integer(portfolio_raw$Date)

cat("Rows parsed:", nrow(portfolio_raw), "\n")

## Rows parsed: 1195

cat("Full date range:", min(portfolio_raw$Date, na.rm = TRUE),
    "to", max(portfolio_raw$Date, na.rm = TRUE), "\n")

## Full date range: 192607 to 202601

head(portfolio_raw)

##     Date      SL      SM      SH      BL      BM      BH
## 1 192607  1.0866  0.8807 -0.1275  5.5746  1.9060  2.0068
## 2 192608  0.7831  1.4677  5.4422  2.7268  2.7028  5.6834
## 3 192609 -2.8045 -0.0599 -0.4399  1.4777  0.0954 -0.7872
## 4 192610 -4.0289 -4.3615 -2.0128 -3.6327 -2.3451 -4.0040
## 5 192611  3.2971  3.6237  2.0877  3.2120  2.9346  3.1964
## 6 192612  2.5645  1.7773  3.2700  2.9011  2.6210  2.3073

portfolio_data <- portfolio_raw %>%
  filter(Date >= 193001, Date <= 201812) %>%
  drop_na()

cat("Observations (Jan 1930 – Dec 2018):", nrow(portfolio_data), "\n")

## Observations (Jan 1930 – Dec 2018): 1068

cat("Date range:", min(portfolio_data$Date), "to", max(portfolio_data$Date), "\n")

## Date range: 193001 to 201812

Split the Sample in Half

n    <- nrow(portfolio_data)
half <- floor(n / 2)

first_half  <- portfolio_data[1:half, ]
second_half <- portfolio_data[(half + 1):n, ]

cat("First half :", nrow(first_half),  "obs |",
    min(first_half$Date),  "to", max(first_half$Date),  "\n")

## First half : 534 obs | 193001 to 197406

cat("Second half:", nrow(second_half), "obs |",
    min(second_half$Date), "to", max(second_half$Date), "\n")

## Second half: 534 obs | 197407 to 201812

Summary Statistics

port_names  <- c("SL", "SM", "SH", "BL", "BM", "BH")
port_labels <- c(
  SL = "Small / Low BtM",
  SM = "Small / Med BtM",
  SH = "Small / High BtM",
  BL = "Big / Low BtM",
  BM = "Big / Med BtM",
  BH = "Big / High BtM"
)

compute_stats <- function(df, label) {
  map_dfr(port_names, function(p) {
    x <- df[[p]]
    tibble(
      Half      = label,
      Portfolio = port_labels[p],
      Mean      = round(mean(x,     na.rm = TRUE), 4),
      SD        = round(sd(x,       na.rm = TRUE), 4),
      Skewness  = round(skewness(x, na.rm = TRUE), 4),
      Kurtosis  = round(kurtosis(x, na.rm = TRUE), 4)
    )
  })
}

stats_first  <- compute_stats(first_half,  "First Half")
stats_second <- compute_stats(second_half, "Second Half")
all_stats    <- bind_rows(stats_first, stats_second)

stats_first %>%
  select(-Half) %>%
  kable(
    caption = paste0("First Half (", min(first_half$Date), " – ", max(first_half$Date), ")"),
    align   = "lrrrr"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "bordered"),
    full_width = FALSE
  )

First Half (193001 – 197406)
Portfolio	Mean	SD	Skewness	Kurtosis
Small / Low BtM	0.9713	8.2253	1.1800	12.0716
Small / Med BtM	1.1695	8.4229	1.5797	15.7404
Small / High BtM	1.4844	10.2059	2.2875	20.0760
Big / Low BtM	0.7648	5.7095	0.1783	9.8941
Big / Med BtM	0.8118	6.7341	1.7116	20.5352
Big / High BtM	1.1874	8.9106	1.7694	17.4682

stats_second %>%
  select(-Half) %>%
  kable(
    caption = paste0("Second Half (", min(second_half$Date), " – ", max(second_half$Date), ")"),
    align   = "lrrrr"
  ) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "bordered"),
    full_width = FALSE
  )

Second Half (197407 – 201812)
Portfolio	Mean	SD	Skewness	Kurtosis
Small / Low BtM	0.9959	6.6884	-0.4086	5.1587
Small / Med BtM	1.3548	5.2817	-0.5330	6.4246
Small / High BtM	1.4251	5.4987	-0.4644	7.3053
Big / Low BtM	0.9781	4.6955	-0.3337	4.9925
Big / Med BtM	1.0578	4.3391	-0.4729	5.6534
Big / High BtM	1.1446	4.8871	-0.5172	5.8054

Visual Comparison

all_stats %>%
  ggplot(aes(x = Portfolio, y = Mean, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Average Monthly Return by Portfolio and Sample Half",
    x = "Portfolio", y = "Mean Monthly Return (%)", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

all_stats %>%
  ggplot(aes(x = Portfolio, y = SD, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Standard Deviation by Portfolio and Sample Half",
    x = "Portfolio", y = "SD of Monthly Return (%)", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

all_stats %>%
  pivot_longer(cols = c(Skewness, Kurtosis), names_to = "Stat", values_to = "Value") %>%
  ggplot(aes(x = Portfolio, y = Value, fill = Half)) +
  geom_col(position = "dodge", width = 0.6) +
  facet_wrap(~ Stat, scales = "free_y") +
  scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
  labs(
    title = "Skewness and Kurtosis by Portfolio and Sample Half",
    x = "Portfolio", y = "Value", fill = NULL
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "top")

Conclusion

full_join(
  stats_first  %>% rename(Mean_H1 = Mean, SD_H1 = SD, Skew_H1 = Skewness, Kurt_H1 = Kurtosis),
  stats_second %>% rename(Mean_H2 = Mean, SD_H2 = SD, Skew_H2 = Skewness, Kurt_H2 = Kurtosis),
  by = "Portfolio"
) %>%
  transmute(
    Portfolio,
    `ΔMean`     = round(Mean_H2 - Mean_H1, 4),
    `ΔSD`       = round(SD_H2   - SD_H1,   4),
    `ΔSkewness` = round(Skew_H2 - Skew_H1, 4),
    `ΔKurtosis` = round(Kurt_H2 - Kurt_H1, 4)
  ) %>%
  kable(caption = "Difference: Second Half minus First Half", align = "lrrrr") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "bordered"),
    full_width = FALSE
  )

Difference: Second Half minus First Half
Portfolio	ΔMean	ΔSD	ΔSkewness	ΔKurtosis
Small / Low BtM	0.0246	-1.5369	-1.5886	-6.9129
Small / Med BtM	0.1853	-3.1412	-2.1127	-9.3158
Small / High BtM	-0.0593	-4.7072	-2.7519	-12.7707
Big / Low BtM	0.2133	-1.0140	-0.5120	-4.9016
Big / Med BtM	0.2460	-2.3950	-2.1845	-14.8818
Big / High BtM	-0.0428	-4.0235	-2.2866	-11.6628

Conclusion: The split-half statistics suggest returns do not come from the same distribution over the full period. Mean returns, volatility, skewness, and kurtosis all differ meaningfully between halves — consistent with structural breaks, Great Depression era volatility, and time-varying risk premia across the 20th and 21st centuries.

CFA Problem 1: Expected Risk Premium

Setup

# Given $100,000 to invest
p_up    <- 0.6;  r_up    <-  50000   # Equities: gain $50,000 with prob 0.6
p_down  <- 0.4;  r_down  <- -30000   # Equities: lose $30,000 with prob 0.4
r_tbill <- 5000                       # T-bill: certain return of $5,000

Calculation

E_equity     <- p_up * r_up + p_down * r_down   # Expected return on equities
risk_premium <- E_equity - r_tbill               # Risk premium

cat("E[Equities]  = 0.6 ×", r_up,   "+ 0.4 ×", r_down,  "=", E_equity,     "\n")

## E[Equities]  = 0.6 × 50000 + 0.4 × -30000 = 18000

cat("E[T-bill]    =", r_tbill, "\n")

## E[T-bill]    = 5000

cat("Risk Premium =", E_equity, "-", r_tbill, "=", risk_premium, "\n")

## Risk Premium = 18000 - 5000 = 13000

Results Table

tibble(
  Calculation = c(
    "E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000)",
    "E[T-bill]   = 1.0 × $5,000",
    "Risk Premium = E[Equities] − E[T-bill]"
  ),
  Result = c(
    paste0("$", format(E_equity,     big.mark = ",")),
    paste0("$", format(r_tbill,      big.mark = ",")),
    paste0("$", format(risk_premium, big.mark = ","))
  )
) %>%
  kable(align = "lr") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "bordered"),
    full_width = FALSE
  ) %>%
  row_spec(3, bold = TRUE, background = "#d4edda")

Calculation	Result
E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000)	$18,000
E[T-bill] = 1.0 × $5,000	$5,000
Risk Premium = E[Equities] − E[T-bill]	$13,000

Answer: The expected risk premium of investing in equities versus risk-free T-bills is $13,000.

Homework 2

Khongorzul Erkhembayar

2026-03-11

Problem 12 (Chapter 5): Six Portfolios — Size and Book-to-Market

Download & Parse Data

Split the Sample in Half

Summary Statistics

Visual Comparison

Conclusion

CFA Problem 1: Expected Risk Premium

Setup

Calculation

Results Table