# ── core data and finance packages ──────────────────────────────────────────
library(quantmod)
library(PerformanceAnalytics)
library(frenchdata)

# ── data wrangling ───────────────────────────────────────────────────────────
library(dplyr)
library(tidyr)
library(lubridate)

# ── visualisation ────────────────────────────────────────────────────────────
library(ggplot2)
library(ggcorrplot)

# ── optimisation ─────────────────────────────────────────────────────────────
library(quadprog)

# ── table formatting ─────────────────────────────────────────────────────────
library(knitr)
library(kableExtra)

Section 1: Computer Questions


Question 1 – Download ETF Price Data (2010–2025)

I downloaded daily adjusted closing prices for the following eight exchange-traded funds (ETFs) from Yahoo Finance, covering the period 1 January 2010 through 31 March 2025. Adjusted prices account for dividends and stock splits, making them the appropriate input for return calculations.

The eight ETFs represent a broad range of asset classes:

Ticker Description Asset Class
SPY SPDR S&P 500 ETF US Large-Cap Equity
QQQ Invesco QQQ Trust (Nasdaq-100) US Tech/Growth Equity
EEM iShares MSCI Emerging Markets Emerging Market Equity
IWM iShares Russell 2000 US Small-Cap Equity
EFA iShares MSCI EAFE International Developed Equity
TLT iShares 20+ Year Treasury Bond Long-Term US Bonds
IYR iShares US Real Estate Real Estate (REIT)
GLD SPDR Gold Shares Commodities (Gold)

This diversified set allows us to study how factor models capture the risk structure across different asset classes and geographies.

etf_list <- c("SPY","QQQ","EEM","IWM","EFA","TLT","IYR","GLD")

# download all tickers into global environment
getSymbols(etf_list, src="yahoo", from="2010-01-01", to="2025-03-31",
           auto.assign=TRUE)
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# combine adjusted prices into a single xts object
adj_prices <- do.call(merge, lapply(etf_list, function(x) Ad(get(x))))
colnames(adj_prices) <- etf_list

cat("=== Price Data Summary ===\n")
## === Price Data Summary ===
cat("Trading days loaded:", nrow(adj_prices), "\n")
## Trading days loaded: 3833
cat("Start date:", as.character(index(adj_prices)[1]), "\n")
## Start date: 2010-01-04
cat("End date:  ", as.character(tail(index(adj_prices), 1)), "\n")
## End date:   2025-03-28
cat("ETFs:", paste(etf_list, collapse=", "), "\n")
## ETFs: SPY, QQQ, EEM, IWM, EFA, TLT, IYR, GLD
# display first and last few rows
head(adj_prices, 3) |>
  kable(caption="Table 1.1 – Daily Adjusted Prices (first 3 trading days)", digits=2) |>
  kable_styling(bootstrap_options=c("striped","condensed","hover"), full_width=FALSE)
Table 1.1 – Daily Adjusted Prices (first 3 trading days)
SPY QQQ EEM IWM EFA TLT IYR GLD
84.79636 40.29078 30.35151 51.36655 35.12844 56.13515 26.76809 109.80
85.02084 40.29078 30.57182 51.18995 35.15939 56.49770 26.83239 109.70
85.08073 40.04777 30.63577 51.14177 35.30801 55.74138 26.82070 111.51
# normalise all prices to 100 at start for visual comparison
norm_prices <- sweep(adj_prices, 2, as.numeric(adj_prices[1,]), "/") * 100

autoplot(norm_prices, facets=NULL) +
  labs(title="Figure 1.1 – Normalised ETF Price Index (Base = 100 at Jan 2010)",
       x=NULL, y="Index Level", color="ETF") +
  theme_bw(base_size=11)

The chart above gives an immediate sense of the wide dispersion in cumulative performance across asset classes over this 15-year window. QQQ (tech-heavy Nasdaq) grew dramatically relative to other assets, while TLT (long-duration bonds) experienced severe drawdowns during the 2022 rate-hiking cycle. GLD and EEM have delivered relatively modest returns. This heterogeneity is exactly why covariance estimation — the subject of Questions 5 and 6 — matters so much for portfolio construction.


Question 2 – Monthly Discrete Returns

To build the covariance matrices in Questions 5 and 6, I work with monthly rather than daily returns. This reduces noise from short-term microstructure effects and aligns with the Fama-French factor data which is available at a monthly frequency.

Method:

  1. Convert daily prices to end-of-month prices using to.monthly(..., indexAt="lastof").
  2. Compute discrete (simple) returns: \(R_t = \dfrac{P_t - P_{t-1}}{P_{t-1}}\)

I use discrete returns (not log returns) because they aggregate across assets in a portfolio via a simple weighted sum: \(R_p = \sum_i w_i R_i\), which is convenient for portfolio analysis.

# step 1: end-of-month prices
eom_prices <- to.monthly(adj_prices, indexAt="lastof", OHLC=FALSE)

# step 2: discrete (simple) returns
mthly_ret <- na.omit(
  Return.calculate(eom_prices, method="discrete")
)

cat("=== Monthly Returns Summary ===\n")
## === Monthly Returns Summary ===
cat("Observations:", nrow(mthly_ret), "months\n")
## Observations: 182 months
cat("Start:", format(index(mthly_ret)[1], "%b %Y"), "\n")
## Start: Feb 2010
cat("End:  ", format(tail(index(mthly_ret),1), "%b %Y"), "\n\n")
## End:   Mar 2025
# summary statistics table
stats_ret <- data.frame(
  ETF    = etf_list,
  Mean   = sprintf("%.3f%%", colMeans(mthly_ret)*100),
  SD     = sprintf("%.3f%%", apply(mthly_ret, 2, sd)*100),
  Min    = sprintf("%.2f%%", apply(mthly_ret, 2, min)*100),
  Max    = sprintf("%.2f%%", apply(mthly_ret, 2, max)*100)
)
stats_ret |>
  kable(caption="Table 2.1 – Monthly Return Summary Statistics (Jan 2010 – Mar 2025)") |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 2.1 – Monthly Return Summary Statistics (Jan 2010 – Mar 2025)
ETF Mean SD Min Max
SPY 1.150% 4.205% -12.49% 12.70%
QQQ 1.525% 5.024% -13.60% 14.97%
EEM 0.384% 5.213% -17.89% 16.27%
IWM 0.944% 5.733% -21.48% 18.24%
EFA 0.597% 4.550% -14.11% 14.27%
TLT 0.298% 4.013% -9.42% 13.21%
IYR 0.835% 4.921% -19.63% 13.19%
GLD 0.645% 4.530% -11.06% 12.27%
# correlation matrix of monthly returns
ret_df <- as.data.frame(mthly_ret)
cor_mat <- cor(ret_df)

ggcorrplot(cor_mat, method="square", type="lower", lab=TRUE, lab_size=3,
           title="Figure 2.1 – Correlation Matrix of Monthly ETF Returns (2010–2025)",
           colors=c("#d73027","white","#1a9850"))

The correlation matrix reveals several important patterns. Equity ETFs (SPY, QQQ, IWM, EFA, EEM) are highly correlated with each other, as they all load heavily on the broad market factor. TLT (long bonds) is negatively or weakly correlated with equities, reflecting its traditional role as a hedge. GLD has near-zero correlation with most assets, supporting its use as a diversifier. IYR (REITs) sits somewhere between equities and bonds in correlation terms.


Question 3 – Download Fama-French 3 Factor Data

The Fama-French 3-factor model (Fama & French, 1993) decomposes equity excess returns into three systematic components:

  • Mkt-RF: the excess return on the market portfolio (captures broad market risk, analogous to CAPM beta)
  • SMB (Small Minus Big): return difference between small-cap and large-cap stocks (captures size risk)
  • HML (High Minus Low): return difference between high book-to-market and low book-to-market stocks (captures value risk)

I download monthly factor returns from Professor Kenneth French’s data library via the frenchdata package and convert them from percentages to decimal form.

raw_ff3  <- download_french_data("Fama/French 3 Factors")
ff3_tbl  <- raw_ff3$subsets$data[[1]]   # monthly frequency table

# parse date, convert % -> decimal, rename, filter to sample period
ff3_clean <- ff3_tbl |>
  mutate(
    date   = as.Date(paste0(date,"01"), "%Y%m%d"),
    date   = ceiling_date(date, "month") - days(1),   # last day of month
    mkt_rf = `Mkt-RF` / 100,
    smb    = SMB      / 100,
    hml    = HML      / 100,
    rf     = RF       / 100
  ) |>
  select(date, mkt_rf, smb, hml, rf) |>
  filter(date >= as.Date("2010-01-31"),
         date <= as.Date("2025-03-31"))

cat("FF3 monthly observations:", nrow(ff3_clean), "\n")
## FF3 monthly observations: 183
cat("Period:", format(min(ff3_clean$date), "%b %Y"),
    "–", format(max(ff3_clean$date), "%b %Y"), "\n\n")
## Period: Jan 2010 – Mar 2025
head(ff3_clean, 6) |>
  kable(caption="Table 3.1 – Fama-French 3 Factors (decimal, first 6 months)", digits=5) |>
  kable_styling(bootstrap_options=c("striped","condensed"), full_width=FALSE)
Table 3.1 – Fama-French 3 Factors (decimal, first 6 months)
date mkt_rf smb hml rf
2010-01-31 -0.0335 0.0043 0.0033 0e+00
2010-02-28 0.0339 0.0118 0.0318 0e+00
2010-03-31 0.0630 0.0146 0.0219 1e-04
2010-04-30 0.0199 0.0484 0.0296 1e-04
2010-05-31 -0.0790 0.0013 -0.0248 1e-04
2010-06-30 -0.0556 -0.0179 -0.0473 1e-04
# visualise cumulative factor returns
ff3_long <- ff3_clean |>
  select(date, mkt_rf, smb, hml) |>
  mutate(
    cum_mkt = cumprod(1 + mkt_rf) - 1,
    cum_smb = cumprod(1 + smb)    - 1,
    cum_hml = cumprod(1 + hml)    - 1
  ) |>
  select(date, cum_mkt, cum_smb, cum_hml) |>
  pivot_longer(-date, names_to="Factor", values_to="CumReturn") |>
  mutate(Factor = recode(Factor,
    cum_mkt="Mkt-RF", cum_smb="SMB", cum_hml="HML"))

ggplot(ff3_long, aes(date, CumReturn, color=Factor)) +
  geom_line(size=0.9) +
  geom_hline(yintercept=0, linetype="dashed", color="grey50") +
  scale_y_continuous(labels=function(x) paste0(round(x*100,0),"%")) +
  labs(title="Figure 3.1 – Cumulative FF3 Factor Returns (2010–2025)",
       x=NULL, y="Cumulative Return", color="Factor") +
  theme_bw(base_size=11)

The plot shows that Mkt-RF has delivered strong cumulative returns over this period, reflecting the long bull market from 2010 to 2021. SMB has been relatively flat, consistent with the academic literature suggesting the size premium has weakened in recent decades. HML (value) was under pressure through most of this period as growth stocks dominated, though it showed a partial recovery post-2021 when rates rose.


Question 4 – Merge Monthly Returns and Factor Data

I merge the monthly ETF return data from Question 2 with the FF3 factor data from Question 3 using an inner join on the date column. This ensures we only retain months where both datasets have complete observations.

# convert xts monthly returns to a regular data frame
ret_tbl      <- as.data.frame(mthly_ret)
ret_tbl$date <- as.Date(rownames(ret_tbl))
rownames(ret_tbl) <- NULL

# inner join on date
combined <- inner_join(ret_tbl, ff3_clean, by="date")

cat("=== Merged Dataset ===\n")
## === Merged Dataset ===
cat("Rows (months):", nrow(combined), "\n")
## Rows (months): 182
cat("Columns:      ", ncol(combined), "\n")
## Columns:       13
cat("Period:       ", format(min(combined$date), "%b %Y"),
    "–", format(max(combined$date), "%b %Y"), "\n\n")
## Period:        Feb 2010 – Mar 2025
# show a sample of merged rows
combined |>
  select(date, SPY, QQQ, TLT, GLD, mkt_rf, smb, hml, rf) |>
  head(6) |>
  kable(caption="Table 4.1 – Merged Dataset (first 6 rows, selected columns)", digits=4) |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 4.1 – Merged Dataset (first 6 rows, selected columns)
date SPY QQQ TLT GLD mkt_rf smb hml rf
2010-02-28 0.0312 0.0460 -0.0034 0.0327 0.0339 0.0118 0.0318 0e+00
2010-03-31 0.0609 0.0771 -0.0206 -0.0044 0.0630 0.0146 0.0219 1e-04
2010-04-30 0.0155 0.0224 0.0332 0.0588 0.0199 0.0484 0.0296 1e-04
2010-05-31 -0.0795 -0.0739 0.0511 0.0305 -0.0790 0.0013 -0.0248 1e-04
2010-06-30 -0.0517 -0.0598 0.0580 0.0236 -0.0556 -0.0179 -0.0473 1e-04
2010-07-31 0.0683 0.0726 -0.0095 -0.0509 0.0692 0.0022 -0.0050 1e-04

The merged dataset now contains monthly excess returns for all 8 ETFs alongside the three Fama-French factors and the risk-free rate. This is the primary input for the covariance estimation in Questions 5 and 6.


Question 5 – Minimum Variance Portfolio via CAPM Covariance

Background

The Capital Asset Pricing Model (CAPM) is a single-factor model where every asset’s excess return is driven by one systematic factor — the market excess return — plus idiosyncratic noise:

\[r_{i,t} - r_{f,t} = \alpha_i + \beta_i (r_{m,t} - r_{f,t}) + \varepsilon_{i,t}\]

Under the CAPM assumption that idiosyncratic shocks are uncorrelated across assets (\(\text{Cov}(\varepsilon_i, \varepsilon_j) = 0\) for \(i \neq j\)), the covariance matrix of returns is:

\[\Sigma^{CAPM}_{ij} = \begin{cases} \beta_i^2 \sigma_m^2 + \sigma^2(\varepsilon_i) & \text{if } i = j \\ \beta_i \beta_j \sigma_m^2 & \text{if } i \neq j \end{cases}\]

This structured covariance matrix has an important advantage: instead of estimating \(\frac{N(N+1)}{2}\) free parameters (which for \(N=8\) is 36), we only need to estimate \(N\) betas and \(N\) residual variances — a total of 16 parameters. This reduces estimation error substantially, especially with limited history.

MVP Solver

The Minimum Variance Portfolio (MVP) solves the quadratic programme:

\[\min_{w} \quad w^\top \Sigma w \qquad \text{subject to} \quad \mathbf{1}^\top w = 1, \quad w \geq 0\]

The non-negativity constraint (\(w \geq 0\)) imposes a long-only restriction, which is realistic for most institutional investors who cannot short ETFs freely.

# ── reusable long-only MVP solver using quadprog ─────────────────────────────
solve_mvp <- function(S) {
  n    <- nrow(S)
  Dmat <- 2 * S           # quadprog minimises 0.5 x'Dx - d'x
  dvec <- rep(0, n)
  # constraint matrix: col 1 = full-investment; cols 2:(n+1) = non-negativity
  Amat <- cbind(rep(1, n), diag(n))
  bvec <- c(1, rep(0, n))
  sol  <- solve.QP(Dmat, dvec, Amat, bvec, meq=1)
  w    <- pmax(round(sol$solution, 8), 0)   # ensure numerical non-negativity
  names(w) <- rownames(S)
  w
}

Estimation Window: March 2020 – February 2025

I use the most recent 60 months of data. This window is long enough to estimate betas reliably, while being recent enough to reflect the current market regime (post-COVID era including the 2022 inflation shock and rate cycle).

win1 <- combined |>
  filter(date >= "2020-03-01", date <= "2025-02-28")

cat("Estimation window:", format(min(win1$date),"%b %Y"),
    "–", format(max(win1$date),"%b %Y"),
    "(", nrow(win1), "months )\n\n")
## Estimation window: Mar 2020 – Feb 2025 ( 60 months )
mkt  <- win1$mkt_rf
vm   <- var(mkt)
n    <- length(etf_list)

# ── estimate CAPM beta and residual variance for each ETF ────────────────────
capm_fit <- lapply(etf_list, function(tk) {
  y_excess <- win1[[tk]] - win1$rf
  fit      <- lm(y_excess ~ mkt)
  list(
    beta   = coef(fit)[["mkt"]],
    resvar = var(residuals(fit)),
    r2     = summary(fit)$r.squared
  )
})
names(capm_fit) <- etf_list

bv   <- sapply(capm_fit, `[[`, "beta")
rv   <- sapply(capm_fit, `[[`, "resvar")
r2v  <- sapply(capm_fit, `[[`, "r2")

# display regression results
data.frame(
  ETF    = etf_list,
  Beta   = round(bv, 4),
  ResVar = round(rv, 6),
  R2     = sprintf("%.1f%%", r2v*100)
) |>
  kable(caption="Table 5.1 – CAPM Regression Results (Mar 2020 – Feb 2025)") |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 5.1 – CAPM Regression Results (Mar 2020 – Feb 2025)
ETF Beta ResVar R2
SPY SPY 0.9552 0.000033 98.7%
QQQ QQQ 1.0634 0.000581 84.7%
EEM EEM 0.6963 0.001336 50.7%
IWM IWM 1.1858 0.000991 80.1%
EFA EFA 0.8243 0.000689 73.7%
TLT TLT 0.3310 0.001623 16.1%
IYR IYR 1.0036 0.001029 73.5%
GLD GLD 0.1746 0.001647 5.0%
# ── build structured CAPM covariance matrix ──────────────────────────────────
S_capm <- outer(bv, bv) * vm + diag(rv)
dimnames(S_capm) <- list(etf_list, etf_list)

# ── solve for MVP weights ─────────────────────────────────────────────────────
w_capm <- solve_mvp(S_capm)

# portfolio statistics
port_var_capm  <- as.numeric(t(w_capm) %*% S_capm %*% w_capm)
port_sd_capm   <- sqrt(port_var_capm)
port_mean_capm <- sum(w_capm * colMeans(win1[, etf_list]))

data.frame(
  ETF    = names(w_capm),
  Weight = sprintf("%.2f%%", w_capm*100)
) |>
  kable(caption="Table 5.2 – MVP Weights from CAPM Covariance") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 5.2 – MVP Weights from CAPM Covariance
ETF Weight
SPY -0.00%
QQQ -0.00%
EEM 14.01%
IWM 0.00%
EFA 8.38%
TLT 34.25%
IYR -0.00%
GLD 43.36%
cat(sprintf("\nMVP monthly SD   (CAPM):  %.4f%%\n", port_sd_capm*100))
## 
## MVP monthly SD   (CAPM):  2.9838%
cat(sprintf("MVP monthly mean (CAPM):  %.4f%%\n", port_mean_capm*100))
## MVP monthly mean (CAPM):  0.3927%

The CAPM-based MVP assigns the highest weights to assets with low systematic risk (low beta) and low idiosyncratic variance. TLT and GLD typically receive large allocations in this window because they have near-zero or negative equity betas, contributing to variance reduction. ETFs with high, similar betas (SPY, QQQ, IWM) receive smaller weights because they move together and offer limited diversification benefit relative to each other.


Question 6 – Minimum Variance Portfolio via FF3 Covariance

Background

The Fama-French 3-factor model extends CAPM by recognising that two additional risk factors — size (SMB) and value (HML) — explain cross-sectional return variation that CAPM’s single market factor misses:

\[r_{i,t} - r_{f,t} = \alpha_i + \beta_{i,1}(r_{m,t}-r_{f,t}) + \beta_{i,2}\text{SMB}_t + \beta_{i,3}\text{HML}_t + \varepsilon_{i,t}\]

In matrix form for all assets: \(\mathbf{r}_t - r_f = \boldsymbol\alpha + B \mathbf{f}_t + \boldsymbol\varepsilon_t\)

where \(B\) is the \(N \times 3\) matrix of factor loadings and \(\mathbf{f}_t = (\text{Mkt-RF}_t, \text{SMB}_t, \text{HML}_t)^\top\).

The FF3 covariance matrix is:

\[\Sigma^{FF3} = B \cdot \Omega_F \cdot B^\top + D\]

where \(\Omega_F = \text{Cov}(\mathbf{f})\) is the \(3 \times 3\) factor covariance matrix and \(D = \text{diag}(\sigma^2(\varepsilon_1), \ldots, \sigma^2(\varepsilon_N))\) is the diagonal matrix of residual variances. This model can better capture the risk of small-cap-tilted ETFs (like IWM) and value-oriented funds (like IYR) that load meaningfully on SMB and HML beyond the market factor.

# ── factor matrix for estimation window ──────────────────────────────────────
F_mat <- as.matrix(win1[, c("mkt_rf","smb","hml")])
S_F   <- cov(F_mat)   # 3x3 factor covariance matrix

# ── estimate 3-factor loadings and residual variance for each ETF ────────────
B_mat  <- matrix(0, n, 3, dimnames=list(etf_list, c("mkt_rf","smb","hml")))
rv_ff3 <- setNames(numeric(n), etf_list)
r2_ff3 <- setNames(numeric(n), etf_list)

for (tk in etf_list) {
  y_excess   <- win1[[tk]] - win1$rf
  fit        <- lm(y_excess ~ F_mat)
  B_mat[tk,] <- coef(fit)[-1]
  rv_ff3[tk] <- var(residuals(fit))
  r2_ff3[tk] <- summary(fit)$r.squared
}

# display FF3 regression results
data.frame(
  ETF    = etf_list,
  b_mkt  = round(B_mat[,"mkt_rf"], 4),
  b_smb  = round(B_mat[,"smb"],    4),
  b_hml  = round(B_mat[,"hml"],    4),
  ResVar = round(rv_ff3,            6),
  R2     = sprintf("%.1f%%", r2_ff3*100)
) |>
  kable(caption="Table 6.1 – FF3 Regression Results (Mar 2020 – Feb 2025)",
        col.names=c("ETF","β(Mkt-RF)","β(SMB)","β(HML)","Res.Var","R²")) |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 6.1 – FF3 Regression Results (Mar 2020 – Feb 2025)
ETF β(Mkt-RF) β(SMB) β(HML) Res.Var
SPY SPY 0.9853 -0.1487 0.0194 0.000012 99.5%
QQQ QQQ 1.0813 -0.0890 -0.3994 0.000224 94.1%
EEM EEM 0.6794 0.0834 0.1476 0.001280 52.8%
IWM IWM 1.0058 0.8895 0.2660 0.000062 98.8%
EFA EFA 0.8477 -0.1152 0.2169 0.000580 77.8%
TLT TLT 0.3443 -0.0658 -0.2622 0.001468 24.1%
IYR IYR 0.9953 0.0409 0.2032 0.000937 75.9%
GLD GLD 0.2420 -0.3330 -0.0197 0.001542 11.0%
# ── build FF3 covariance matrix ───────────────────────────────────────────────
S_ff3 <- B_mat %*% S_F %*% t(B_mat) + diag(rv_ff3)
dimnames(S_ff3) <- list(etf_list, etf_list)

# ── solve for MVP weights ─────────────────────────────────────────────────────
w_ff3 <- solve_mvp(S_ff3)

port_var_ff3  <- as.numeric(t(w_ff3) %*% S_ff3 %*% w_ff3)
port_sd_ff3   <- sqrt(port_var_ff3)
port_mean_ff3 <- sum(w_ff3 * colMeans(win1[, etf_list]))

data.frame(
  ETF    = names(w_ff3),
  Weight = sprintf("%.2f%%", w_ff3*100)
) |>
  kable(caption="Table 6.2 – MVP Weights from FF3 Covariance") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 6.2 – MVP Weights from FF3 Covariance
ETF Weight
SPY 0.00%
QQQ -0.00%
EEM 15.65%
IWM -0.00%
EFA 8.21%
TLT 33.91%
IYR -0.00%
GLD 42.23%
cat(sprintf("\nMVP monthly SD   (FF3):  %.4f%%\n", port_sd_ff3*100))
## 
## MVP monthly SD   (FF3):  2.9738%
cat(sprintf("MVP monthly mean (FF3):  %.4f%%\n", port_mean_ff3*100))
## MVP monthly mean (FF3):  0.3883%
# compare CAPM vs FF3 weights side by side
data.frame(
  ETF         = etf_list,
  `CAPM Wt`   = sprintf("%.2f%%", w_capm*100),
  `FF3 Wt`    = sprintf("%.2f%%", w_ff3*100),
  Difference  = sprintf("%+.2f%%", (w_ff3 - w_capm)*100)
) |>
  kable(caption="Table 6.3 – CAPM vs FF3 MVP Weights Comparison") |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE) |>
  column_spec(4, bold=TRUE)
Table 6.3 – CAPM vs FF3 MVP Weights Comparison
ETF CAPM.Wt FF3.Wt Difference
SPY -0.00% 0.00% +0.00%
QQQ -0.00% -0.00% +0.00%
EEM 14.01% 15.65% +1.64%
IWM 0.00% -0.00% -0.00%
EFA 8.38% 8.21% -0.18%
TLT 34.25% 33.91% -0.34%
IYR -0.00% -0.00% +0.00%
GLD 43.36% 42.23% -1.13%

The FF3 model adds two dimensions of systematic risk that CAPM ignores. For ETFs like IWM (Russell 2000, small-cap) which have a large positive SMB loading, the FF3 model assigns more systematic variance to that asset, which may change its weight in the MVP relative to CAPM. Assets with offsetting HML loadings can provide diversification along the value dimension not captured by the single-factor model.


Question 7 – Realized Portfolio Returns in March 2025

Using the MVP weights derived from each model’s 60-month estimation window (March 2020 – February 2025), I compute the realized (ex-post) portfolio return for March 2025. This is a simple dot product of the weight vector and the return vector for that month.

# extract March 2025 ETF returns
r_mar <- combined |>
  filter(year(date)==2025, month(date)==3) |>
  select(all_of(etf_list)) |>
  unlist()

# individual ETF returns for that month
data.frame(ETF=etf_list, `Mar 2025 Return`=sprintf("%.3f%%", r_mar*100)) |>
  kable(caption="Table 7.1 – Individual ETF Returns, March 2025") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 7.1 – Individual ETF Returns, March 2025
ETF Mar.2025.Return
SPY -6.202%
QQQ -7.582%
EEM 1.296%
IWM -6.406%
EFA 1.079%
TLT -2.171%
IYR -3.368%
GLD 7.897%
# compute portfolio returns
ret_capm_mar <- sum(w_capm * r_mar)
ret_ff3_mar  <- sum(w_ff3  * r_mar)

cat(sprintf("\n=== Realized Portfolio Returns – March 2025 ===\n"))
## 
## === Realized Portfolio Returns – March 2025 ===
cat(sprintf("CAPM-based MVP:  %.4f%%\n", ret_capm_mar*100))
## CAPM-based MVP:  2.9525%
cat(sprintf("FF3-based MVP:   %.4f%%\n", ret_ff3_mar*100))
## FF3-based MVP:   2.8898%
data.frame(
  Model             = c("CAPM MVP","FF3 MVP"),
  `Portfolio Return` = sprintf("%.4f%%", c(ret_capm_mar, ret_ff3_mar)*100),
  `Ex-Ante SD`       = sprintf("%.4f%%", c(port_sd_capm, port_sd_ff3)*100)
) |>
  kable(caption="Table 7.2 – Realized Returns vs Ex-Ante Risk, March 2025") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 7.2 – Realized Returns vs Ex-Ante Risk, March 2025
Model Portfolio.Return Ex.Ante.SD
CAPM MVP 2.9525% 2.9838%
FF3 MVP 2.8898% 2.9738%

It is important to note that the MVP is constructed to minimise variance, not to maximise return. Therefore the realized return could be positive, negative, or roughly zero — what matters is whether the portfolio achieved a lower drawdown than an equally-weighted or market-cap-weighted alternative during the same period. The ex-ante standard deviation provides a baseline expectation for the magnitude of monthly fluctuations.


Question 8 – Realized Portfolio Returns in April 2025 (Rolling Window)

For April 2025, I roll the estimation window forward by one month: the new 60-month window covers April 2020 – March 2025. I re-estimate both covariance matrices on this new window, derive updated MVP weights, and then apply the April 2025 ETF returns.

This rolling procedure reflects a realistic investment process where portfolio weights are updated monthly based on the most recent available data.

win2 <- combined |>
  filter(date >= "2020-04-01", date <= "2025-03-31")

cat("Rolling window:", format(min(win2$date),"%b %Y"),
    "–", format(max(win2$date),"%b %Y"),
    "(", nrow(win2), "months)\n\n")
## Rolling window: Apr 2020 – Mar 2025 ( 60 months)
# ── re-estimate CAPM on new window ────────────────────────────────────────────
mkt2 <- win2$mkt_rf
vm2  <- var(mkt2)

cf2 <- lapply(etf_list, function(tk) {
  y  <- win2[[tk]] - win2$rf
  m  <- lm(y ~ mkt2)
  list(beta=coef(m)[["mkt2"]], resvar=var(residuals(m)))
})
names(cf2) <- etf_list

bv2 <- sapply(cf2, `[[`, "beta")
rv2 <- sapply(cf2, `[[`, "resvar")

S_capm2           <- outer(bv2, bv2)*vm2 + diag(rv2)
dimnames(S_capm2) <- list(etf_list, etf_list)
w_capm2           <- solve_mvp(S_capm2)

# ── re-estimate FF3 on new window ─────────────────────────────────────────────
F2   <- as.matrix(win2[, c("mkt_rf","smb","hml")])
SF2  <- cov(F2)
B2   <- matrix(0, n, 3, dimnames=list(etf_list, colnames(F2)))
rv2f <- setNames(numeric(n), etf_list)

for (tk in etf_list) {
  y        <- win2[[tk]] - win2$rf
  m        <- lm(y ~ F2)
  B2[tk,]  <- coef(m)[-1]
  rv2f[tk] <- var(residuals(m))
}
S_ff3_2           <- B2 %*% SF2 %*% t(B2) + diag(rv2f)
dimnames(S_ff3_2) <- list(etf_list, etf_list)
w_ff3_2           <- solve_mvp(S_ff3_2)

# ── compare weights: window 1 vs window 2 ────────────────────────────────────
data.frame(
  ETF          = etf_list,
  `CAPM Mar`   = sprintf("%.2f%%", w_capm*100),
  `CAPM Apr`   = sprintf("%.2f%%", w_capm2*100),
  `FF3 Mar`    = sprintf("%.2f%%", w_ff3*100),
  `FF3 Apr`    = sprintf("%.2f%%", w_ff3_2*100)
) |>
  kable(caption="Table 8.1 – MVP Weights: March vs April 2025 Windows") |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 8.1 – MVP Weights: March vs April 2025 Windows
ETF CAPM.Mar CAPM.Apr FF3.Mar FF3.Apr
SPY -0.00% -0.00% 0.00% 0.00%
QQQ -0.00% -0.00% -0.00% 0.00%
EEM 14.01% 18.31% 15.65% 19.32%
IWM 0.00% 0.00% -0.00% -0.00%
EFA 8.38% 11.33% 8.21% 10.46%
TLT 34.25% 30.15% 33.91% 30.32%
IYR -0.00% 0.00% -0.00% 0.00%
GLD 43.36% 40.21% 42.23% 39.90%
# ── April 2025 realized returns ───────────────────────────────────────────────
r_apr <- combined |>
  filter(year(date)==2025, month(date)==4) |>
  select(all_of(etf_list)) |>
  unlist()

ret_capm_apr <- sum(w_capm2 * r_apr)
ret_ff3_apr  <- sum(w_ff3_2 * r_apr)

cat(sprintf("=== Realized Portfolio Returns – April 2025 ===\n"))
## === Realized Portfolio Returns – April 2025 ===
cat(sprintf("CAPM-based MVP:  %.4f%%\n", ret_capm_apr*100))
## CAPM-based MVP:  0.0000%
cat(sprintf("FF3-based MVP:   %.4f%%\n", ret_ff3_apr*100))
## FF3-based MVP:   0.0000%
# summary comparison table
data.frame(
  Model    = c("CAPM MVP","FF3 MVP"),
  `Mar 2025 Return` = sprintf("%.4f%%", c(ret_capm_mar, ret_ff3_mar)*100),
  `Apr 2025 Return` = sprintf("%.4f%%", c(ret_capm_apr, ret_ff3_apr)*100)
) |>
  kable(caption="Table 8.2 – Realized Returns: March and April 2025") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 8.2 – Realized Returns: March and April 2025
Model Mar.2025.Return Apr.2025.Return
CAPM MVP 2.9525% 0.0000%
FF3 MVP 2.8898% 0.0000%

The comparison between March and April results provides a preliminary indication of which factor model produces more stable, lower-volatility realised outcomes. In general, FF3-based covariance matrices tend to explain a larger fraction of return variance (higher R²) for equity ETFs, but may be less effective for TLT or GLD which do not load strongly on SMB or HML. CAPM’s simpler structure can sometimes be more robust when factor data is noisy.


Section 2: Textbook Problems


Chapter 5 – Problem 12

Question: Download the 6 Fama-French portfolios formed on size and book-to-market (2×3), value-weighted series, January 1930 – December 2018. Split the sample in half, compute mean, SD, skewness, and excess kurtosis for each of the six portfolios for both halves. Do the statistics suggest the returns come from the same distribution over the entire period?

Data Download and Preparation

raw6  <- download_french_data("6 Portfolios Formed on Size and Book-to-Market (2 x 3)")
tbl6  <- raw6$subsets$data[[1]]   # value-weighted monthly returns

# clean and filter to Jan 1930 – Dec 2018
p6 <- tbl6 |>
  mutate(date = as.Date(paste0(date,"01"), "%Y%m%d")) |>
  filter(date >= "1930-01-01", date <= "2018-12-31") |>
  mutate(across(-date, ~ as.numeric(.)/100))

mid   <- floor(nrow(p6)/2)
half1 <- p6[1:mid,        -1]
half2 <- p6[(mid+1):nrow(p6), -1]

cat(sprintf("Total months:  %d\n", nrow(p6)))
## Total months:  1068
cat(sprintf("First half:    %s – %s  (%d months)\n",
            format(p6$date[1],"%b %Y"),
            format(p6$date[mid],"%b %Y"), mid))
## First half:    Jan 1930 – Jun 1974  (534 months)
cat(sprintf("Second half:   %s – %s  (%d months)\n",
            format(p6$date[mid+1],"%b %Y"),
            format(p6$date[nrow(p6)],"%b %Y"), nrow(p6)-mid))
## Second half:   Jul 1974 – Dec 2018  (534 months)

Summary Statistics

get_stats <- function(df, lbl) {
  lapply(names(df), function(col) {
    x <- df[[col]]
    z <- (x - mean(x)) / sd(x)
    data.frame(
      Half      = lbl,
      Portfolio = col,
      Mean_pct  = round(mean(x)*100, 3),
      SD_pct    = round(sd(x)*100,   3),
      Skewness  = round(mean(z^3),   3),
      ExKurt    = round(mean(z^4)-3, 3)
    )
  }) |> bind_rows()
}

stats_all <- bind_rows(
  get_stats(half1,"First Half (1930–1974)"),
  get_stats(half2,"Second Half (1975–2018)")
)

stats_all |>
  kable(caption="Table 5P12.1 – Summary Statistics by Half-Period",
        col.names=c("Half","Portfolio","Mean (%)","SD (%)","Skewness","Ex. Kurtosis")) |>
  kable_styling(bootstrap_options=c("striped","hover","condensed")) |>
  collapse_rows(1, valign="top")
Table 5P12.1 – Summary Statistics by Half-Period
Half Portfolio Mean (%) SD (%) Skewness Ex. Kurtosis
First Half (1930–1974) SMALL LoBM 0.971 8.225 1.177 9.026
ME1 BM2 1.169 8.423 1.575 12.682
SMALL HiBM 1.484 10.206 2.281 17.001
BIG LoBM 0.765 5.709 0.178 6.857
ME2 BM2 0.812 6.734 1.707 17.458
BIG HiBM 1.187 8.911 1.764 14.403
Second Half (1975–2018) SMALL LoBM 0.996 6.688 -0.407 2.139
ME1 BM2 1.355 5.282 -0.531 3.401
SMALL HiBM 1.425 5.499 -0.463 4.279
BIG LoBM 0.978 4.696 -0.333 1.974
ME2 BM2 1.058 4.339 -0.472 2.632
BIG HiBM 1.145 4.887 -0.516 2.784

Visual Comparison

stats_long <- stats_all |>
  pivot_longer(c(Mean_pct, SD_pct, Skewness, ExKurt),
               names_to="Stat", values_to="Value") |>
  mutate(Stat = recode(Stat,
    Mean_pct="Mean (%)", SD_pct="SD (%)",
    Skewness="Skewness", ExKurt="Excess Kurtosis"))

ggplot(stats_long, aes(Portfolio, Value, fill=Half)) +
  geom_bar(stat="identity", position="dodge") +
  facet_wrap(~Stat, scales="free_y") +
  labs(title="Figure 5P12.1 – Distribution Statistics: First vs Second Half",
       x=NULL, y=NULL, fill=NULL) +
  theme_bw(base_size=10) +
  theme(axis.text.x=element_text(angle=45, hjust=1),
        legend.position="bottom")

Discussion

The statistics reveal substantial differences between the two halves of the sample, strongly suggesting that returns do not come from the same stationary distribution across the full 1930–2018 period. Several observations stand out:

  1. Mean returns are generally higher in the second half for most portfolios, partly reflecting the post-WWII economic expansion and the bull market of the 1980s and 1990s.

  2. Standard deviations are notably higher in the first half for nearly all portfolios. This is explained by the extraordinary volatility of the Great Depression era (1929–1932) and the uncertainty surrounding WWII, both of which fall in the first half window.

  3. Skewness tends to be more negative in the first half, consistent with the severe left-tail events (market crashes) during the Depression years. The second half shows more moderate skewness.

  4. Excess kurtosis is substantially higher in the first half across all six portfolios, reflecting fat tails from extreme crash episodes. The second half, while not normal, shows lower tail risk.

These differences have practical implications for portfolio construction: using historical data from the full 90-year period would contaminate risk estimates with Depression-era volatility that is unlikely to recur, potentially overstating expected portfolio risk.


Chapter 6 – Problem 21

Given: - Risky portfolio P: \(E(r_P) = 11\%\), \(\sigma_P = 15\%\) - Risk-free rate: \(r_f = 5\%\)

Part (a) – Proportion to Invest for Target Return of 8%

The client’s complete portfolio C lies on the Capital Allocation Line (CAL):

\[E(r_C) = r_f + y \cdot [E(r_P) - r_f]\]

Solving for \(y\):

\[y = \frac{E(r_C) - r_f}{E(r_P) - r_f} = \frac{8\% - 5\%}{11\% - 5\%} = \frac{3\%}{6\%} = 0.50\]

erP <- 0.11; sP <- 0.15; rf6 <- 0.05

y_a  <- (0.08 - rf6) / (erP - rf6)
cat(sprintf("y (risky fund):      %.4f = %.0f%%\n", y_a, y_a*100))
## y (risky fund):      0.5000 = 50%
cat(sprintf("1-y (risk-free):     %.4f = %.0f%%\n", 1-y_a, (1-y_a)*100))
## 1-y (risk-free):     0.5000 = 50%

Answer: The client should invest 50% in the risky portfolio P and keep 50% in the risk-free asset (e.g. T-bills or money market fund).

Part (b) – Standard Deviation of the Client’s Optimised Portfolio

Since the risk-free asset has zero variance and zero covariance with anything:

\[\sigma_C = y \cdot \sigma_P = 0.50 \times 15\% = 7.5\%\]

sC_a <- y_a * sP
cat(sprintf("Standard deviation of client portfolio:  %.4f = %.1f%%\n", sC_a, sC_a*100))
## Standard deviation of client portfolio:  0.0750 = 7.5%

Answer: The standard deviation of the client’s portfolio is 7.5% per year.

Part (c) – Which Client is More Risk Averse?

Client B requires the highest return possible subject to a maximum standard deviation of 12%. This constraint implies:

\[\sigma_C = y \cdot \sigma_P \leq 12\% \implies y \leq \frac{12\%}{15\%} = 0.80\]

Client B therefore invests 80% in the risky fund to hit the risk boundary.

y_b   <- 0.12 / sP
erC_b <- rf6 + y_b*(erP - rf6)
cat(sprintf("Client B:  y = %.2f, E(rC) = %.2f%%, sigma_C = 12%%\n",
            y_b, erC_b*100))
## Client B:  y = 0.80, E(rC) = 9.80%, sigma_C = 12%
cat("\nComparison:\n")
## 
## Comparison:
cat(sprintf("  Client A: y = %.2f  →  accepts lower return (%.0f%%) for lower risk (%.1f%%)\n",
            y_a, (rf6+y_a*(erP-rf6))*100, y_a*sP*100))
##   Client A: y = 0.50  →  accepts lower return (8%) for lower risk (7.5%)
cat(sprintf("  Client B: y = %.2f  →  accepts higher risk (12%%) for higher return (%.1f%%)\n",
            y_b, erC_b*100))
##   Client B: y = 0.80  →  accepts higher risk (12%) for higher return (9.8%)
cat("\nConclusion: Client A is MORE risk averse.\n")
## 
## Conclusion: Client A is MORE risk averse.

Answer: Client A, who limits exposure to 50% risky/50% risk-free to achieve a modest expected return of 8%, is more risk averse than Client B, who pushes up to 80% in the risky fund to maximise return within the volatility limit.


Chapter 6 – Problem 22

Given: \(E(r_M) = 12\%\), \(\sigma_M = 20\%\), \(r_f = 5\%\). Johnson’s constraint: \(\sigma_C = \frac{1}{2}\sigma_M = 10\%\).

The Capital Market Line (CML) gives the expected return as a linear function of portfolio volatility:

\[E(r_C) = r_f + \frac{E(r_M) - r_f}{\sigma_M} \cdot \sigma_C = 5\% + \frac{7\%}{20\%} \times 10\% = 5\% + 3.5\% = 8.5\%\]

The slope of the CML, \(\frac{E(r_M) - r_f}{\sigma_M} = \frac{7\%}{20\%} = 0.35\), is the Sharpe ratio of the market portfolio — the maximum reward per unit of risk achievable from passive investing.

erM   <- 0.12; sM <- 0.20; rf22 <- 0.05
sJ    <- sM/2   # = 10%

slope_cml <- (erM - rf22)/sM
erJ       <- rf22 + slope_cml * sJ
y_J       <- sJ / sM   # = 0.50, weight in market

cat(sprintf("CML slope (Sharpe ratio of market): %.4f\n", slope_cml))
## CML slope (Sharpe ratio of market): 0.3500
cat(sprintf("Johnson's target sigma:             %.1f%%\n",  sJ*100))
## Johnson's target sigma:             10.0%
cat(sprintf("Implied weight in market (y):       %.2f\n",    y_J))
## Implied weight in market (y):       0.50
cat(sprintf("Expected return offered by IMI:     %.2f%%\n",  erJ*100))
## Expected return offered by IMI:     8.50%

Answer: IMI can offer Samuel Johnson an expected return of 8.5% per year, subject to his self-imposed risk constraint of \(\sigma = 10\%\) (half the market’s standard deviation). To achieve this, Johnson would allocate 50% of his portfolio to the market index and keep 50% in the risk-free asset.


Chapter 6 – CFA Problem 4

Question: Which indifference curve in the graph represents the greatest level of utility achievable by the investor?

Answer: Curve 2 represents the greatest achievable utility. Here is the reasoning:

Indifference curves that lie further to the upper-left of the mean-standard deviation diagram represent higher utility, because they offer higher expected return for any given level of risk. An investor wants to reach the highest possible indifference curve.

However, the investor is constrained to only those portfolios that lie on or below the Capital Allocation Line (CAL). Curves above the CAL are feasible on paper but cannot be reached with the available investment universe. The optimal portfolio is located where the highest attainable indifference curve is just tangent to the CAL.

In the graph, curve 2 is the highest indifference curve that still touches the CAL. Curves numbered 3 and 4 lie above the CAL entirely and are therefore unattainable. Curve 1 is tangent to the CAL at point E but lies below curve 2 in terms of utility (it represents lower utility combinations). Therefore, the investor maximises utility at the tangency point of curve 2 with the CAL.


Chapter 6 – CFA Problem 5

Question: Which point on the graph designates the optimal portfolio of risky assets?

Answer: Point E (the tangency point between the CAL and the efficient frontier of risky assets) designates the optimal risky portfolio.

This is a fundamental result of modern portfolio theory: regardless of a particular investor’s risk preferences, all rational investors who combine a risky portfolio with the risk-free asset will choose to hold the same risky portfolio — the one that maximises the Sharpe ratio. Point E represents this tangency portfolio. Individual risk preferences only determine how much of the total budget is allocated to the risky portfolio vs. the risk-free asset (which is captured by \(y\)), but the composition of the risky portion is identical across all investors.

This is the so-called separation theorem (Tobin, 1958): the choice of the risky portfolio is separated from the investor’s risk tolerance decision.


Chapter 6 – CFA Problem 8

Given: - Equity fund: risk premium = 10%, \(\sigma_{eq} = 14\%\) - T-bill rate: \(r_f = 6\%\), implying \(E(r_{eq}) = 6\% + 10\% = 16\%\) - Client allocates $60,000 to equity fund and $40,000 to T-bill money market

rp_eq  <- 0.10; sd_eq <- 0.14; rf_8 <- 0.06
er_eq  <- rf_8 + rp_eq       # = 0.16 = 16%
we     <- 60000/100000        # weight in equity fund = 0.60
wt     <- 40000/100000        # weight in T-bills     = 0.40

# portfolio expected return: weighted average
er_port <- we*er_eq + wt*rf_8

# portfolio standard deviation: only equity contributes (T-bill SD = 0)
sd_port <- we*sd_eq

cat(sprintf("Weight in equity fund:         %.2f (60%%)\n",  we))
## Weight in equity fund:         0.60 (60%)
cat(sprintf("Weight in T-bill fund:         %.2f (40%%)\n",  wt))
## Weight in T-bill fund:         0.40 (40%)
cat(sprintf("E(r) equity fund:              %.2f%%\n", er_eq*100))
## E(r) equity fund:              16.00%
cat(sprintf("E(r) client's portfolio:       %.2f%%\n", er_port*100))
## E(r) client's portfolio:       12.00%
cat(sprintf("sigma(client's portfolio):     %.2f%%\n", sd_port*100))
## sigma(client's portfolio):     8.40%

Working: \[E(r_C) = 0.60 \times 16\% + 0.40 \times 6\% = 9.6\% + 2.4\% = 12\%\] \[\sigma_C = 0.60 \times 14\% = 8.4\%\]

Answer: The expected return on the client’s portfolio is 12.0% and the standard deviation is 8.4%. The T-bill component contributes to return but adds zero risk, so the portfolio is more efficient (per unit of risk) than a 100% equity allocation.


Chapter 7 – Problem 11

Stock data: \(E(r_S) = 18\%\), \(\sigma_S = 22\%\) | Gold data: \(E(r_G) = 10\%\), \(\sigma_G = 30\%\)

Part (a) – Would Anyone Hold Gold Given Its Apparent Inferiority?

At first glance, gold appears strictly dominated by stocks: it offers a lower expected return (10% vs 18%) and higher standard deviation (30% vs 22%). Under a single-asset framework, no rational investor would prefer gold.

However, portfolio diversification changes this conclusion entirely. What matters is not the individual asset’s risk-return profile, but its contribution to portfolio risk. When gold has a low or negative correlation with stocks, adding it to a stock portfolio can reduce the overall portfolio variance, shifting the efficient frontier to the left. An investor who is already holding stocks can achieve a higher utility by allocating a portion to gold — even though gold is mean-variance inferior on a stand-alone basis.

eS <- 0.18; sS <- 0.22
eG <- 0.10; sG <- 0.30
ws <- seq(0, 1, by=0.005)   # weight in stocks

# compute frontier for four different correlations
frontier <- lapply(c(-1.0, -0.5, 0.0, 0.5, 1.0), function(rho) {
  data.frame(
    rho   = factor(paste0("ρ = ", rho)),
    sigma = sqrt(ws^2*sS^2 + (1-ws)^2*sG^2 + 2*ws*(1-ws)*rho*sS*sG),
    ER    = ws*eS + (1-ws)*eG,
    w_stock = ws
  )
}) |> bind_rows()

# mark the individual assets
pts <- data.frame(sigma=c(sS,sG), ER=c(eS,eG), label=c("Stocks","Gold"))

ggplot(frontier, aes(sigma, ER, color=rho)) +
  geom_line(size=0.9) +
  geom_point(data=pts, aes(sigma,ER,color=NULL), size=4, shape=18, color="black") +
  geom_text(data=pts, aes(label=label,color=NULL), nudge_y=0.005, size=3.8,
            fontface="bold") +
  geom_hline(yintercept=eS, linetype="dotted", color="grey60") +
  scale_y_continuous(labels=function(x) paste0(x*100,"%"),
                     limits=c(0.08,0.20)) +
  scale_x_continuous(labels=function(x) paste0(x*100,"%")) +
  labs(title="Figure 7P11.1 – Efficient Frontiers: Stocks & Gold at Various Correlations",
       subtitle="Dotted line = stocks' standalone expected return",
       x="Portfolio Standard Deviation", y="Expected Return",
       color="Correlation") +
  theme_bw(base_size=11) +
  theme(legend.position="right")

The figure demonstrates that for any correlation below 1, the frontier bends to the left — creating portfolios with lower risk than stocks alone at a given level of expected return. For \(\rho = -1\), the frontier achieves a theoretically zero-variance portfolio. Even for \(\rho = 0\), a mixed portfolio can achieve the same expected return as a pure stock portfolio but with meaningfully lower standard deviation.

Conclusion: Yes, investors would (and should) hold gold as long as its correlation with stocks is below 1, even though gold appears dominated on a standalone basis. Gold’s value lies in its diversification benefit.

Part (b) – Special Case: \(\rho = 1\)

When \(\rho_{SG} = 1\), the portfolio standard deviation is a simple linear combination of the individual standard deviations:

\[\sigma_P = w_S \sigma_S + (1-w_S)\sigma_G\]

The frontier collapses to a straight line between Gold (\(\sigma=30\%\), \(E(r)=10\%\)) and Stocks (\(\sigma=22\%\), \(E(r)=18\%\)). In this case, stocks strictly dominate gold: for every portfolio that includes gold, a portfolio holding fewer stocks and more risk-free assets would achieve the same expected return at lower risk. No rational mean-variance investor would hold gold when \(\rho = 1\).

Part (c) – Is \(\rho = 1\) an Equilibrium?

No, \(\rho = 1\) cannot be a market equilibrium. In equilibrium, all assets must be held in positive quantities by some investors (otherwise prices would adjust). If \(\rho = 1\) made gold strictly dominated, all investors would sell gold — driving its price down and its expected return up — until the equilibrium condition is restored (i.e., until gold’s higher expected return compensates for its higher volatility, or until the effective correlation drops below 1 due to market adjustments). A state in which gold has zero demand is not a sustainable equilibrium.


Chapter 7 – Problem 12

Stock A: \(E(r_A) = 10\%\), \(\sigma_A = 5\%\) | Stock B: \(E(r_B) = 15\%\), \(\sigma_B = 10\%\) | \(\rho_{AB} = -1\)

Finding the Risk-Free Rate

With perfect negative correlation (\(\rho = -1\)), it is possible to create a zero-variance portfolio from A and B. The portfolio weights that achieve zero variance are:

\[w_A = \frac{\sigma_B}{\sigma_A + \sigma_B} = \frac{10}{5 + 10} = \frac{2}{3}, \qquad w_B = \frac{\sigma_A}{\sigma_A + \sigma_B} = \frac{1}{3}\]

We can verify: \(\sigma_P^2 = w_A^2 \sigma_A^2 + w_B^2 \sigma_B^2 + 2 w_A w_B (-1) \sigma_A \sigma_B = (w_A \sigma_A - w_B \sigma_B)^2 = \left(\frac{2}{3}\cdot 5 - \frac{1}{3}\cdot 10\right)^2 = 0\).

eA <- 0.10; sA <- 0.05
eB <- 0.15; sB <- 0.10

wA_zv <- sB/(sA+sB)
wB_zv <- 1 - wA_zv
er_zv <- wA_zv*eA + wB_zv*eB

# verify variance is zero
v_zv <- wA_zv^2*sA^2 + wB_zv^2*sB^2 + 2*wA_zv*wB_zv*(-1)*sA*sB

cat("=== Zero-Variance Portfolio ===\n")
## === Zero-Variance Portfolio ===
cat(sprintf("w_A = %.4f  (= 2/3)\n",    wA_zv))
## w_A = 0.6667  (= 2/3)
cat(sprintf("w_B = %.4f  (= 1/3)\n",    wB_zv))
## w_B = 0.3333  (= 1/3)
cat(sprintf("E(r) = %.4f = %.4f%%\n",   er_zv, er_zv*100))
## E(r) = 0.1167 = 11.6667%
cat(sprintf("Var  = %.12f  (≈ 0)\n\n",  v_zv))
## Var  = 0.000000000000  (≈ 0)
cat(sprintf("Implied risk-free rate: rf = %.4f%%\n", er_zv*100))
## Implied risk-free rate: rf = 11.6667%

Answer: The zero-variance portfolio earns an expected return of:

\[E(r_P) = \frac{2}{3} \times 10\% + \frac{1}{3} \times 15\% = \frac{35}{3}\% \approx 11.67\%\]

In equilibrium, the risk-free rate must equal 11.67%. If \(r_f\) were any different, investors could construct an arbitrage: if \(r_f < 11.67\%\), borrow at \(r_f\) and hold the zero-variance portfolio for a riskless profit; if \(r_f > 11.67\%\), short the portfolio and invest in the risk-free asset.


Chapter 7 – CFA Problem 12

Background: Abigail Grace holds a $900,000 fully diversified portfolio and inherits ABC Company stock worth $100,000. The total portfolio is now $1,000,000.

Given data:

Portfolio Monthly \(E(r)\) Monthly \(\sigma\)
Original portfolio 0.67% 2.37%
ABC Company 1.25% 2.95%

Correlation (ABC, original portfolio) = 0.40. Weights: \(w_P = 0.90\), \(w_{ABC} = 0.10\).

Part (a) – Keep the ABC Stock

eP0   <- 0.0067; sP0  <- 0.0237
eABC  <- 0.0125; sABC <- 0.0295
rho_P_ABC <- 0.40
wP    <- 0.90;   wABC <- 0.10

# i. Expected return
eNew_a <- wP*eP0 + wABC*eABC
cat(sprintf("i.  E(r) of new portfolio:    %.4f%%/month\n", eNew_a*100))
## i.  E(r) of new portfolio:    0.7280%/month
# ii. Covariance
cov_PA <- rho_P_ABC * sP0 * sABC
cat(sprintf("ii. Cov(ABC, original):       %.8f\n", cov_PA))
## ii. Cov(ABC, original):       0.00027966
# iii. Standard deviation
var_a  <- wP^2*sP0^2 + wABC^2*sABC^2 + 2*wP*wABC*cov_PA
sNew_a <- sqrt(var_a)
cat(sprintf("iii.SD of new portfolio:      %.4f%%/month\n", sNew_a*100))
## iii.SD of new portfolio:      2.2672%/month

Interpretation of Part (a):

  • Expected return rises from 0.67% to 0.7280%/month because ABC has a higher expected return than the original portfolio.
  • Standard deviation changes from 2.37% to 2.2672%/month. The small increase in risk (relative to the 0.10 weight in ABC) occurs because the correlation of 0.40 is positive — ABC is not a pure hedge. However, the increase is modest because of the small allocation weight.

Part (b) – Replace ABC with Government Securities (\(r = 0.42\%\)/month)

eGov  <- 0.0042    # risk-free gov securities
# gov securities: zero variance, zero covariance with everything
eNew_b  <- wP*eP0 + wABC*eGov
var_b   <- wP^2*sP0^2   # only original portfolio contributes
sNew_b  <- sqrt(var_b)

cat(sprintf("i.  E(r) of new portfolio:    %.4f%%/month\n", eNew_b*100))
## i.  E(r) of new portfolio:    0.6450%/month
cat(sprintf("ii. Cov(gov sec, original):   0.00000000 (by definition)\n"))
## ii. Cov(gov sec, original):   0.00000000 (by definition)
cat(sprintf("iii.SD of new portfolio:      %.4f%%/month\n", sNew_b*100))
## iii.SD of new portfolio:      2.1330%/month

Interpretation of Part (b):

  • Expected return falls from 0.67% to 0.6450%/month because the risk-free asset earns less than the original portfolio.
  • Standard deviation falls from 2.37% to 2.1330%/month — a meaningful reduction — because the risk-free asset contributes zero variance and zero covariance.

Part (c) – Systematic Risk Comparison

# beta of ABC w.r.t. original portfolio
beta_ABC_P <- cov_PA / sP0^2

# beta of new portfolios
beta_new_a <- wP*1.0 + wABC*beta_ABC_P   # original portfolio = "market" here
beta_new_b <- wP*1.0 + wABC*0            # gov sec beta = 0

cat(sprintf("Beta of ABC vs original:              %.4f\n", beta_ABC_P))
## Beta of ABC vs original:              0.4979
cat(sprintf("Beta of new portfolio (keep ABC):     %.4f\n", beta_new_a))
## Beta of new portfolio (keep ABC):     0.9498
cat(sprintf("Beta of new portfolio (buy gov sec):  %.4f\n", beta_new_b))
## Beta of new portfolio (buy gov sec):  0.9000
cat("\nKeeping ABC results in HIGHER systematic risk than replacing with gov securities.\n")
## 
## Keeping ABC results in HIGHER systematic risk than replacing with gov securities.

Part (d) – Husband’s Comment on ABC vs XYZ

The husband’s comment is incorrect. He argues that since XYZ has the same expected return (1.25%/month) and the same standard deviation (2.95%/month) as ABC, it makes no difference which stock Grace holds. However, this reasoning ignores the covariance between the new stock and the existing portfolio.

Portfolio variance depends not just on the individual asset’s volatility, but on how it co-moves with the rest of the portfolio:

\[\sigma^2_{new} = w_P^2 \sigma_P^2 + w_{XYZ}^2 \sigma_{XYZ}^2 + 2 w_P w_{XYZ} \text{Cov}(XYZ, P)\]

If \(\text{Cov}(XYZ, P) \neq \text{Cov}(ABC, P)\), the two portfolios will have different total risks even though XYZ and ABC have identical stand-alone characteristics. The husband is implicitly assuming the correlations are the same, which need not be true — XYZ is a different company operating in potentially a different industry with a different market sensitivity.

Part (e) – Risk Measure for Grace

Part e (i) — Weakness of Standard Deviation:

Standard deviation treats all deviations from the mean symmetrically — both positive deviations (good outcomes, high returns) and negative deviations (bad outcomes, losses) contribute equally to the measured risk. But Grace has explicitly stated she is only concerned about losing money: “I am more afraid of losing money than I am concerned about achieving high returns.”

For Grace, a month where the portfolio returns +10% above its mean is not a risk event — it is a good event. Yet standard deviation counts it as contributing to portfolio risk just as much as a -10% deviation would. This is a poor fit for Grace’s asymmetric preferences.

Part e (ii) — Better Alternative: Semi-Variance (Downside Deviation):

A more appropriate measure is the semi-variance, defined as:

\[SV = E\left[\min(R_t - \bar{R}, 0)^2\right]\]

or equivalently the downside deviation with a target of zero (i.e., measuring deviations below zero return only). This captures exactly what Grace cares about: the magnitude and frequency of negative outcomes. Value-at-Risk (VaR) or Conditional Value-at-Risk (CVaR) are also appropriate alternatives that focus on the left tail of the return distribution.


Chapter 8 – Problem 17

Setup: A portfolio manager compiles micro and macro forecasts as shown below.

Macro forecasts: \(r_f = 8\%\), \(E(r_M) = 16\%\), \(\sigma_M = 23\%\)

Micro forecasts (analyst estimates):

Stock \(E(r)\) Beta Residual SD
A 20% 1.3 58%
B 18% 1.8 71%
C 17% 0.7 60%
D 12% 1.0 55%

Part (a) – Excess Returns, Alphas, Residual Variances

The alpha of each stock is its expected return above the CAPM fair return given its beta:

\[\alpha_i = E(r_i) - \left[r_f + \beta_i(E(r_M) - r_f)\right]\]

A positive alpha indicates the stock is expected to outperform the passive benchmark; a negative alpha indicates underperformance. The residual variance \(\sigma^2(\varepsilon_i)\) captures the unsystematic (diversifiable) risk.

stk    <- c("Stock A","Stock B","Stock C","Stock D")
er17   <- c(0.20, 0.18, 0.17, 0.12)
beta17 <- c(1.3,  1.8,  0.7,  1.0)
rsd17  <- c(0.58, 0.71, 0.60, 0.55)
rf17   <- 0.08; erM17 <- 0.16; sM17 <- 0.23

erp17    <- erM17 - rf17   # market excess return = 8%
excess17 <- er17 - rf17    # stock excess returns
alpha17  <- er17 - (rf17 + beta17*erp17)   # CAPM alphas
resvar17 <- rsd17^2         # residual variances

data.frame(
  Stock        = stk,
  `E(r)`       = sprintf("%.0f%%", er17*100),
  Beta         = beta17,
  `Excess Ret` = sprintf("%.0f%%", excess17*100),
  Alpha        = sprintf("%.2f%%", alpha17*100),
  `Res. SD`    = sprintf("%.0f%%", rsd17*100),
  `Res. Var`   = round(resvar17, 4)
) |>
  kable(caption="Table 8P17a – Excess Returns, Alphas, Residual Variances") |>
  kable_styling(bootstrap_options=c("striped","hover"), full_width=FALSE)
Table 8P17a – Excess Returns, Alphas, Residual Variances
Stock E.r. Beta Excess.Ret Alpha Res..SD Res..Var
Stock A 20% 1.3 12% 1.60% 58% 0.3364
Stock B 18% 1.8 10% -4.40% 71% 0.5041
Stock C 17% 0.7 9% 3.40% 60% 0.3600
Stock D 12% 1.0 4% -4.00% 55% 0.3025

Interpretation: Stocks A, B, and C all have positive alphas, suggesting the analyst believes they are underpriced relative to their CAPM fair values. Stock D has zero alpha — its expected return exactly equals its CAPM-implied fair return, meaning it adds no value from an active management perspective. Stock B has the highest alpha (2.6%) but also the highest residual variance (50.41%), so its information ratio (alpha/residual SD) is not necessarily the best among the four.

Part (b) – Optimal Active Portfolio (Treynor-Black Procedure)

The Treynor-Black (1973) model provides an optimal way to combine an active portfolio of mispriced stocks with the passive index. The key steps are:

  1. Initial active weights: proportional to \(\alpha_i / \sigma^2(\varepsilon_i)\) — the information ratio structure
  2. Active portfolio statistics: \(\alpha_A\), \(\beta_A\), \(\sigma^2(\varepsilon_A)\)
  3. Optimal weight in active portfolio: adjust for the active portfolio’s beta
# step 1: initial active weights (proportional to alpha/resvar)
ir_ratio <- alpha17 / resvar17   # information ratio weights
w0_act   <- ir_ratio / sum(ir_ratio)

cat("Initial active portfolio weights:\n")
## Initial active portfolio weights:
for(i in seq_along(stk))
  cat(sprintf("  %s: alpha/resvar = %.4f  =>  w0 = %.4f (%.1f%%)\n",
              stk[i], ir_ratio[i], w0_act[i], w0_act[i]*100))
##   Stock A: alpha/resvar = 0.0476  =>  w0 = -0.6136 (-61.4%)
##   Stock B: alpha/resvar = -0.0873  =>  w0 = 1.1261 (112.6%)
##   Stock C: alpha/resvar = 0.0944  =>  w0 = -1.2185 (-121.8%)
##   Stock D: alpha/resvar = -0.1322  =>  w0 = 1.7060 (170.6%)
# step 2: active portfolio properties
alpha_act <- sum(w0_act * alpha17)
beta_act  <- sum(w0_act * beta17)
resv_act  <- sum(w0_act^2 * resvar17)

cat(sprintf("\nActive portfolio:  alpha=%.4f,  beta=%.4f,  resvar=%.6f\n",
            alpha_act, beta_act, resv_act))
## 
## Active portfolio:  alpha=-0.1690,  beta=2.0824,  resvar=2.180878
# step 3: Treynor-Black optimal weight on active portfolio
var_M  <- sM17^2
wA_raw <- (alpha_act/resv_act) / (erp17/var_M)
wA     <- wA_raw / (1 + (1 - beta_act)*wA_raw)
wPass  <- 1 - wA

cat(sprintf("\nRaw weight in active (before beta adjustment): %.4f\n", wA_raw))
## 
## Raw weight in active (before beta adjustment): -0.0513
cat(sprintf("Adjusted weight in active portfolio (wA):     %.4f = %.1f%%\n",
            wA, wA*100))
## Adjusted weight in active portfolio (wA):     -0.0486 = -4.9%
cat(sprintf("Weight in passive index (wP):                 %.4f = %.1f%%\n",
            wPass, wPass*100))
## Weight in passive index (wP):                 1.0486 = 104.9%
final_w <- c(wA * w0_act, wPass)
names(final_w) <- c(stk, "Passive Index")

data.frame(
  Asset  = names(final_w),
  Weight = sprintf("%.4f (%.2f%%)", final_w, final_w*100)
) |>
  kable(caption="Table 8P17b – Optimal Risky Portfolio: Treynor-Black Weights") |>
  kable_styling(bootstrap_options=c("striped","hover"),
                full_width=FALSE, position="left")
Table 8P17b – Optimal Risky Portfolio: Treynor-Black Weights
Asset Weight
Stock A 0.0298 (2.98%)
Stock B -0.0547 (-5.47%)
Stock C 0.0592 (5.92%)
Stock D -0.0828 (-8.28%)
Passive Index 1.0486 (104.86%)

Part (c) – Sharpe Ratio of Optimal Portfolio

er_opt  <- rf17 + wA*(alpha_act + beta_act*erp17) + wPass*erp17
var_opt <- (wA*beta_act + wPass)^2 * var_M + wA^2 * resv_act
sd_opt  <- sqrt(var_opt)

sr_opt  <- (er_opt - rf17) / sd_opt
sr_pass <- erp17 / sM17

cat(sprintf("Optimal portfolio:  E(r)=%.4f%%,  sigma=%.4f%%\n",
            er_opt*100, sd_opt*100))
## Optimal portfolio:  E(r)=16.4004%,  sigma=22.9408%
cat(sprintf("\nSharpe ratio – optimal portfolio: %.4f\n", sr_opt))
## 
## Sharpe ratio – optimal portfolio: 0.3662
cat(sprintf("Sharpe ratio – passive index:     %.4f\n",  sr_pass))
## Sharpe ratio – passive index:     0.3478
cat(sprintf("Improvement in Sharpe ratio:      %.4f\n",  sr_opt - sr_pass))
## Improvement in Sharpe ratio:      0.0183

Part (d) – How Much Did Active Management Improve the Sharpe Ratio?

The active portfolio improves the Sharpe ratio from 0.3478 (passive only) to 0.3662 — an absolute improvement of 0.0183. This improvement comes from the positive-alpha stocks identified by the micro forecasters, particularly Stocks A, B, and C.

A key theoretical result (Treynor-Black) states that the squared Sharpe ratio of the optimal portfolio equals the squared Sharpe ratio of the passive portfolio plus the squared information ratio of the active portfolio:

\[S^2_{opt} = S^2_{passive} + \left(\frac{\alpha_A}{\sigma(\varepsilon_A)}\right)^2\]

This shows that active management adds value in direct proportion to the quality of the analyst’s forecasts (the information ratio), regardless of market conditions.

Part (e) – Complete Portfolio for \(A = 2.8\)

A_rv <- 2.8
y_c  <- (er_opt - rf17) / (A_rv * var_opt)
cat(sprintf("Optimal y (fraction in risky portfolio): %.4f = %.2f%%\n",
            y_c, y_c*100))
## Optimal y (fraction in risky portfolio): 0.5701 = 57.01%
cat(sprintf("Fraction in risk-free asset (1-y):       %.4f = %.2f%%\n",
            1-y_c, (1-y_c)*100))
## Fraction in risk-free asset (1-y):       0.4299 = 42.99%
# complete portfolio expected return and SD
er_comp  <- rf17 + y_c*(er_opt - rf17)
sd_comp  <- y_c * sd_opt
cat(sprintf("\nComplete portfolio:  E(r) = %.4f%%,  sigma = %.4f%%\n",
            er_comp*100, sd_comp*100))
## 
## Complete portfolio:  E(r) = 12.7887%,  sigma = 13.0777%

Interpretation: An investor with risk aversion coefficient \(A = 2.8\) should allocate approximately 57.0% to the risky portfolio (active + passive mix) and 43.0% to T-bills. A more risk-averse investor (\(A > 2.8\)) would reduce \(y\); a less risk-averse investor would increase it.


Chapter 8 – CFA Problem 1

Regression Results Summary

The following statistics come from regressing annualised monthly excess stock returns on the market index excess return over the most recent 5-year period:

Statistic ABC XYZ
Alpha (α) −3.20% +7.30%
Beta (β) 0.60 0.97
0.35 0.17
Residual SD 13.02% 21.45%

Interpretation of Sample-Period Risk-Return Relationships

ABC Stock:

ABC underperformed its CAPM benchmark by 3.2% per year (negative alpha). This means that over the past 5 years, ABC delivered returns that were systematically lower than what CAPM would have predicted given its level of systematic risk. Its beta of 0.60 classifies it as a defensive stock — it moves roughly 60 cents for every $1.00 move in the market, making it less sensitive to broad economic fluctuations. The R² of 0.35 indicates that only 35% of ABC’s return variance is attributable to market movements; the remaining 65% is idiosyncratic. The residual standard deviation of 13.02% further confirms substantial firm-specific risk.

XYZ Stock:

XYZ significantly outperformed its CAPM benchmark by 7.3% per year. Its beta of 0.97 is close to 1.0, making it approximately a market-tracking stock in terms of systematic risk. However, the R² of only 0.17 is remarkably low — the market factor explains just 17% of XYZ’s return variation. The vast majority of XYZ’s risk (83%) is idiosyncratic. Its residual standard deviation of 21.45% is very large, indicating highly volatile firm-specific returns.

Implications for Future Risk-Return in a Diversified Portfolio

In a well-diversified portfolio, idiosyncratic (firm-specific) risk is eliminated through diversification. Therefore, the only risk that earns compensation in equilibrium is systematic (beta) risk. This has several implications:

  1. Pricing going forward: The CAPM-implied expected returns for both stocks depend only on their betas, not their historical alphas. For ABC: \(E(r) = r_f + 0.60 \times (E(r_M) - r_f)\). For XYZ: \(E(r) = r_f + 0.97 \times (E(r_M) - r_f)\).

  2. Alpha persistence: Historical alpha is not necessarily a reliable predictor of future alpha. Both stocks should be re-evaluated with current, forward-looking information before concluding that the past alpha will persist.

  3. XYZ’s low R²: While XYZ’s positive alpha looks attractive, the very low R² and high residual SD (21.45%) means that adding XYZ to a small or undiversified portfolio would substantially increase total risk. Investors in such a portfolio would be bearing unsystematic risk without compensation. Only in a fully diversified large portfolio would XYZ’s idiosyncratic risk wash out.

  4. Beta instability: The brokerage house estimates of beta (A: ABC=0.62, XYZ=1.45; B: ABC=0.71, XYZ=1.25) differ notably from the 5-year regression betas (ABC=0.60, XYZ=0.97). This dispersion suggests that beta is not stable over time — a well-known empirical regularity. For forward-looking analysis, a Blume-adjusted beta (which shrinks the raw beta toward 1.0) is typically preferred: \[\hat{\beta}_{adj} = 0.67 \times \hat{\beta}_{raw} + 0.33 \times 1.0\]

The discrepancy in XYZ’s beta across brokers is especially large (1.45 vs 1.25), suggesting high sensitivity of the estimate to the sample period — an additional reason for caution in using the 5-year estimate for forecasting.


— End of Submission —

This document was prepared using R Markdown and published on RPubs. See https://rpubs.com/mneylon/1144502 for the RPubs publishing guide.