Problem 12 (Chapter 5): Six Portfolios — Size and
Book-to-Market
Download & Parse Data
url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_2x3_CSV.zip"
temp_zip <- tempfile(fileext = ".zip")
temp_dir <- tempdir()
download.file(url, temp_zip, mode = "wb")
unzip(temp_zip, exdir = temp_dir)
csv_file <- file.path(temp_dir, "6_Portfolios_2x3.csv")
raw_lines <- readLines(csv_file)
# Data section "Average Value Weighted Returns -- Monthly"
# starts at line 15 (header), line 16 (col names), line 17 (data)
data_start <- 17
blank_after <- which(trimws(raw_lines[data_start:length(raw_lines)]) == "")[1]
data_end <- data_start + blank_after - 2
cat("Data from line", data_start, "to line", data_end, "\n")
## Data from line 17 to line 1211
cat("Monthly observations:", data_end - data_start + 1, "\n")
## Monthly observations: 1195
data_block <- raw_lines[data_start:data_end]
portfolio_raw <- read.csv(
text = paste(c("Date,SL,SM,SH,BL,BM,BH", data_block), collapse = "\n"),
header = TRUE,
na.strings = c("-99.99", "-999")
)
portfolio_raw$Date <- as.integer(portfolio_raw$Date)
cat("Rows parsed:", nrow(portfolio_raw), "\n")
## Rows parsed: 1195
cat("Full date range:", min(portfolio_raw$Date, na.rm = TRUE),
"to", max(portfolio_raw$Date, na.rm = TRUE), "\n")
## Full date range: 192607 to 202601
head(portfolio_raw)
## Date SL SM SH BL BM BH
## 1 192607 1.0866 0.8807 -0.1275 5.5746 1.9060 2.0068
## 2 192608 0.7831 1.4677 5.4422 2.7268 2.7028 5.6834
## 3 192609 -2.8045 -0.0599 -0.4399 1.4777 0.0954 -0.7872
## 4 192610 -4.0289 -4.3615 -2.0128 -3.6327 -2.3451 -4.0040
## 5 192611 3.2971 3.6237 2.0877 3.2120 2.9346 3.1964
## 6 192612 2.5645 1.7773 3.2700 2.9011 2.6210 2.3073
portfolio_data <- portfolio_raw %>%
filter(Date >= 193001, Date <= 201812) %>%
drop_na()
cat("Observations (Jan 1930 – Dec 2018):", nrow(portfolio_data), "\n")
## Observations (Jan 1930 – Dec 2018): 1068
cat("Date range:", min(portfolio_data$Date), "to", max(portfolio_data$Date), "\n")
## Date range: 193001 to 201812
Split the Sample in Half
n <- nrow(portfolio_data)
half <- floor(n / 2)
first_half <- portfolio_data[1:half, ]
second_half <- portfolio_data[(half + 1):n, ]
cat("First half :", nrow(first_half), "obs |",
min(first_half$Date), "to", max(first_half$Date), "\n")
## First half : 534 obs | 193001 to 197406
cat("Second half:", nrow(second_half), "obs |",
min(second_half$Date), "to", max(second_half$Date), "\n")
## Second half: 534 obs | 197407 to 201812
Summary Statistics
port_names <- c("SL", "SM", "SH", "BL", "BM", "BH")
port_labels <- c(
SL = "Small / Low BtM",
SM = "Small / Med BtM",
SH = "Small / High BtM",
BL = "Big / Low BtM",
BM = "Big / Med BtM",
BH = "Big / High BtM"
)
compute_stats <- function(df, label) {
map_dfr(port_names, function(p) {
x <- df[[p]]
tibble(
Half = label,
Portfolio = port_labels[p],
Mean = round(mean(x, na.rm = TRUE), 4),
SD = round(sd(x, na.rm = TRUE), 4),
Skewness = round(skewness(x, na.rm = TRUE), 4),
Kurtosis = round(kurtosis(x, na.rm = TRUE), 4)
)
})
}
stats_first <- compute_stats(first_half, "First Half")
stats_second <- compute_stats(second_half, "Second Half")
all_stats <- bind_rows(stats_first, stats_second)
stats_first %>%
select(-Half) %>%
kable(
caption = paste0("First Half (", min(first_half$Date), " – ", max(first_half$Date), ")"),
align = "lrrrr"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = FALSE
)
First Half (193001 – 197406)
|
Portfolio
|
Mean
|
SD
|
Skewness
|
Kurtosis
|
|
Small / Low BtM
|
0.9713
|
8.2253
|
1.1800
|
12.0716
|
|
Small / Med BtM
|
1.1695
|
8.4229
|
1.5797
|
15.7404
|
|
Small / High BtM
|
1.4844
|
10.2059
|
2.2875
|
20.0760
|
|
Big / Low BtM
|
0.7648
|
5.7095
|
0.1783
|
9.8941
|
|
Big / Med BtM
|
0.8118
|
6.7341
|
1.7116
|
20.5352
|
|
Big / High BtM
|
1.1874
|
8.9106
|
1.7694
|
17.4682
|
stats_second %>%
select(-Half) %>%
kable(
caption = paste0("Second Half (", min(second_half$Date), " – ", max(second_half$Date), ")"),
align = "lrrrr"
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed", "bordered"),
full_width = FALSE
)
Second Half (197407 – 201812)
|
Portfolio
|
Mean
|
SD
|
Skewness
|
Kurtosis
|
|
Small / Low BtM
|
0.9959
|
6.6884
|
-0.4086
|
5.1587
|
|
Small / Med BtM
|
1.3548
|
5.2817
|
-0.5330
|
6.4246
|
|
Small / High BtM
|
1.4251
|
5.4987
|
-0.4644
|
7.3053
|
|
Big / Low BtM
|
0.9781
|
4.6955
|
-0.3337
|
4.9925
|
|
Big / Med BtM
|
1.0578
|
4.3391
|
-0.4729
|
5.6534
|
|
Big / High BtM
|
1.1446
|
4.8871
|
-0.5172
|
5.8054
|
Visual Comparison
all_stats %>%
ggplot(aes(x = Portfolio, y = Mean, fill = Half)) +
geom_col(position = "dodge", width = 0.6) +
scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
labs(
title = "Average Monthly Return by Portfolio and Sample Half",
x = "Portfolio", y = "Mean Monthly Return (%)", fill = NULL
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")

all_stats %>%
ggplot(aes(x = Portfolio, y = SD, fill = Half)) +
geom_col(position = "dodge", width = 0.6) +
scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
labs(
title = "Standard Deviation by Portfolio and Sample Half",
x = "Portfolio", y = "SD of Monthly Return (%)", fill = NULL
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")

all_stats %>%
pivot_longer(cols = c(Skewness, Kurtosis), names_to = "Stat", values_to = "Value") %>%
ggplot(aes(x = Portfolio, y = Value, fill = Half)) +
geom_col(position = "dodge", width = 0.6) +
facet_wrap(~ Stat, scales = "free_y") +
scale_fill_manual(values = c("First Half" = "#2c7bb6", "Second Half" = "#d7191c")) +
labs(
title = "Skewness and Kurtosis by Portfolio and Sample Half",
x = "Portfolio", y = "Value", fill = NULL
) +
theme_minimal(base_size = 13) +
theme(legend.position = "top")

Conclusion
full_join(
stats_first %>% rename(Mean_H1 = Mean, SD_H1 = SD, Skew_H1 = Skewness, Kurt_H1 = Kurtosis),
stats_second %>% rename(Mean_H2 = Mean, SD_H2 = SD, Skew_H2 = Skewness, Kurt_H2 = Kurtosis),
by = "Portfolio"
) %>%
transmute(
Portfolio,
`ΔMean` = round(Mean_H2 - Mean_H1, 4),
`ΔSD` = round(SD_H2 - SD_H1, 4),
`ΔSkewness` = round(Skew_H2 - Skew_H1, 4),
`ΔKurtosis` = round(Kurt_H2 - Kurt_H1, 4)
) %>%
kable(caption = "Difference: Second Half minus First Half", align = "lrrrr") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE
)
Difference: Second Half minus First Half
|
Portfolio
|
ΔMean
|
ΔSD
|
ΔSkewness
|
ΔKurtosis
|
|
Small / Low BtM
|
0.0246
|
-1.5369
|
-1.5886
|
-6.9129
|
|
Small / Med BtM
|
0.1853
|
-3.1412
|
-2.1127
|
-9.3158
|
|
Small / High BtM
|
-0.0593
|
-4.7072
|
-2.7519
|
-12.7707
|
|
Big / Low BtM
|
0.2133
|
-1.0140
|
-0.5120
|
-4.9016
|
|
Big / Med BtM
|
0.2460
|
-2.3950
|
-2.1845
|
-14.8818
|
|
Big / High BtM
|
-0.0428
|
-4.0235
|
-2.2866
|
-11.6628
|
Conclusion: The split-half statistics suggest
returns do not come from the same distribution over the
full period. Mean returns, volatility, skewness, and kurtosis all differ
meaningfully between halves — consistent with structural breaks, Great
Depression era volatility, and time-varying risk premia across the 20th
and 21st centuries.
CFA Problem 1: Expected Risk Premium
Setup
# Given $100,000 to invest
p_up <- 0.6; r_up <- 50000 # Equities: gain $50,000 with prob 0.6
p_down <- 0.4; r_down <- -30000 # Equities: lose $30,000 with prob 0.4
r_tbill <- 5000 # T-bill: certain return of $5,000
Calculation
E_equity <- p_up * r_up + p_down * r_down # Expected return on equities
risk_premium <- E_equity - r_tbill # Risk premium
cat("E[Equities] = 0.6 ×", r_up, "+ 0.4 ×", r_down, "=", E_equity, "\n")
## E[Equities] = 0.6 × 50000 + 0.4 × -30000 = 18000
cat("E[T-bill] =", r_tbill, "\n")
## E[T-bill] = 5000
cat("Risk Premium =", E_equity, "-", r_tbill, "=", risk_premium, "\n")
## Risk Premium = 18000 - 5000 = 13000
Results Table
tibble(
Calculation = c(
"E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000)",
"E[T-bill] = 1.0 × $5,000",
"Risk Premium = E[Equities] − E[T-bill]"
),
Result = c(
paste0("$", format(E_equity, big.mark = ",")),
paste0("$", format(r_tbill, big.mark = ",")),
paste0("$", format(risk_premium, big.mark = ","))
)
) %>%
kable(align = "lr") %>%
kable_styling(
bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE
) %>%
row_spec(3, bold = TRUE, background = "#d4edda")
|
Calculation
|
Result
|
|
E[Equities] = 0.6 × $50,000 + 0.4 × (−$30,000)
|
$18,000
|
|
E[T-bill] = 1.0 × $5,000
|
$5,000
|
|
Risk Premium = E[Equities] − E[T-bill]
|
$13,000
|
Answer: The expected risk premium of investing in
equities versus risk-free T-bills is $13,000.