1. Introduction and Investment Question

This project examines whether the small-cap premium still exists by comparing small-cap equity ETFs with large-cap or broad-market benchmark ETFs across three regions: Europe, the United States, and emerging markets. The analysis is conducted from the perspective of a European investor using monthly data, a world equity benchmark, and the €STR risk-free rate.

2. Data and Methodology

The raw data are stored in the Excel workbook Final R sheet.xlsx. In this RMarkdown file, monthly returns are calculated directly in R from the monthly price series to ensure reproducibility. The pre-calculated return sheet in the workbook is used only as a validation check.

# Main workbook
file_path <- "Final R sheet.xlsx"

# Sheet names
price_sheet <- "Monthly_Prices"
rf_sheet <- "Risk_Free_Rate"
validation_sheet <- "Analysis_Input_R"

# Asset columns
asset_cols <- c(
  "EU_Large_XMEU_DE",
  "EU_Small_SMC_PA",
  "US_Large_SXR8_DE",
  "US_Small_ZPRR_DE",
  "EM_Large_SPYM",
  "EM_Small_SPYX"
)

benchmark_col <- "World_Benchmark_IUSQ"

rf_col_raw <- "RF_ESTR_Monthly_Return"

rf_col <- "RF_ESTR"

risky_cols <- c(asset_cols, benchmark_col)

output_dir <- "step1_outputs"

if (!dir.exists(output_dir)) {
  dir.create(output_dir)
}

asset_labels <- c(
  "EU Large",
  "EU Small",
  "US Large",
  "US Small",
  "EM Large",
  "EM Small"
)

names(asset_labels) <- asset_cols

portfolio_labels <- c(
  "MVP_Unconstrained" = "MVP unconstrained",
  "MVP_Long_Only" = "MVP long-only",
  "Tangency_Unconstrained" = "Tangency unconstrained",
  "Tangency_Long_Only" = "Tangency long-only"
)
convert_excel_date <- function(x) {
  if (inherits(x, "Date")) {
    return(x)
  }
  
  if (inherits(x, "POSIXct") | inherits(x, "POSIXt")) {
    return(as.Date(x))
  }
  
  if (is.numeric(x)) {
    return(as.Date(x, origin = "1899-12-30"))
  }
  
  return(as.Date(x))
}


fmt_pct <- function(x, digits = 2) {
  percent(x, accuracy = 10^(-digits))
}


portfolio_performance <- function(weights, mu, Sigma) {
  port_return <- as.numeric(t(weights) %*% mu)
  port_sd <- as.numeric(sqrt(t(weights) %*% Sigma %*% weights))
  port_sharpe <- port_return / port_sd
  
  tibble(
    Mean_Excess_Return = port_return,
    Standard_Deviation = port_sd,
    Sharpe_Ratio = port_sharpe
  )
}

3. Required Analysis

3.1 Step 1: Risk and Return Profile

3.1.1 Data import and return calculation

prices_raw <- read_excel(file_path, sheet = price_sheet)

prices <- prices_raw %>%
  mutate(Month_End = convert_excel_date(Month_End)) %>%
  arrange(Month_End) %>%
  mutate(across(all_of(risky_cols), as.numeric))

glimpse(prices)
## Rows: 71
## Columns: 8
## $ Month_End            <date> 2020-01-31, 2020-02-29, 2020-03-31, 2020-04-30, …
## $ EU_Large_XMEU_DE     <dbl> 63.500, 58.000, 49.745, 52.850, 54.360, 56.130, 5…
## $ EU_Small_SMC_PA      <dbl> 251.50, 231.45, 182.34, 202.50, 212.45, 215.85, 2…
## $ US_Large_SXR8_DE     <dbl> 290.95, 264.50, 239.62, 265.96, 271.24, 274.05, 2…
## $ US_Small_ZPRR_DE     <dbl> 40.065, 36.630, 29.055, 33.445, 34.190, 35.110, 3…
## $ EM_Large_SPYM        <dbl> 49.690, 47.390, 41.157, 44.342, 43.765, 46.880, 4…
## $ EM_Small_SPYX        <dbl> 68.12, 63.51, 50.31, 56.46, 57.35, 61.65, 63.34, …
## $ World_Benchmark_IUSQ <dbl> 49.085, 44.945, 39.890, 43.635, 44.470, 45.530, 4…
head(prices)
## # A tibble: 6 × 8
##   Month_End  EU_Large_XMEU_DE EU_Small_SMC_PA US_Large_SXR8_DE US_Small_ZPRR_DE
##   <date>                <dbl>           <dbl>            <dbl>            <dbl>
## 1 2020-01-31             63.5            252.             291.             40.1
## 2 2020-02-29             58              231.             264.             36.6
## 3 2020-03-31             49.7            182.             240.             29.1
## 4 2020-04-30             52.8            202.             266.             33.4
## 5 2020-05-31             54.4            212.             271.             34.2
## 6 2020-06-30             56.1            216.             274.             35.1
## # ℹ 3 more variables: EM_Large_SPYM <dbl>, EM_Small_SPYX <dbl>,
## #   World_Benchmark_IUSQ <dbl>
tail(prices)
## # A tibble: 6 × 8
##   Month_End  EU_Large_XMEU_DE EU_Small_SMC_PA US_Large_SXR8_DE US_Small_ZPRR_DE
##   <date>                <dbl>           <dbl>            <dbl>            <dbl>
## 1 2025-06-30             97.5            344.             562.             53.6
## 2 2025-07-31             98.3            349.             596.             56.2
## 3 2025-08-31             99.3            346.             589.             58.5
## 4 2025-09-30            101.             347.             606.             59.7
## 5 2025-10-31            104.             351.             634.             62.0
## 6 2025-11-30            102.             335              612.             59.3
## # ℹ 3 more variables: EM_Large_SPYM <dbl>, EM_Small_SPYX <dbl>,
## #   World_Benchmark_IUSQ <dbl>
# Simple monthly returns from prices:
# r_t = P_t / P_(t-1) - 1
# Calculating simple monthly returns from price levels.
# Simple returns are used because portfolio returns are weighted averages of individual asset returns.

monthly_returns <- prices %>%
  arrange(Month_End) %>%
  mutate(across(all_of(risky_cols), ~ .x / lag(.x) - 1)) %>%
  filter(!is.na(.data[[asset_cols[1]]])) %>%
  select(Month_End, all_of(risky_cols))

# Risk-free rate
rf_raw <- read_excel(file_path, sheet = rf_sheet)

rf <- rf_raw %>%
  mutate(
    Month_End = convert_excel_date(Month_End),
    Month_ID = format(Month_End, "%Y-%m"),
    RF_ESTR = as.numeric(.data[[rf_col_raw]])
  ) %>%
  arrange(Month_End) %>%
  select(Month_ID, RF_ESTR)

# Joining returns and risk-free rate by year-month, not exact date because this avoids problems when ETF prices use calendar month-end but €STR uses the last business day of the month.
returns_data <- monthly_returns %>%
  mutate(Month_ID = format(Month_End, "%Y-%m")) %>%
  left_join(rf, by = "Month_ID") %>%
  select(-Month_ID) %>%
  arrange(Month_End)

glimpse(returns_data)
## Rows: 70
## Columns: 9
## $ Month_End            <date> 2020-02-29, 2020-03-31, 2020-04-30, 2020-05-31, …
## $ EU_Large_XMEU_DE     <dbl> -0.086614173, -0.142327586, 0.062418334, 0.028571…
## $ EU_Small_SMC_PA      <dbl> -0.079721670, -0.212184057, 0.110562685, 0.049135…
## $ US_Large_SXR8_DE     <dbl> -0.090909091, -0.094064272, 0.109924046, 0.019852…
## $ US_Small_ZPRR_DE     <dbl> -0.0857356795, -0.2067977068, 0.1510927551, 0.022…
## $ EM_Large_SPYM        <dbl> -0.046286979, -0.131525638, 0.077386593, -0.01301…
## $ EM_Small_SPYX        <dbl> -0.067674692, -0.207841285, 0.122242099, 0.015763…
## $ World_Benchmark_IUSQ <dbl> -0.0843434858, -0.1124707976, 0.0938831787, 0.019…
## $ RF_ESTR              <dbl> -0.0004485833, -0.0004448485, -0.0004471250, -0.0…
head(returns_data)
## # A tibble: 6 × 9
##   Month_End  EU_Large_XMEU_DE EU_Small_SMC_PA US_Large_SXR8_DE US_Small_ZPRR_DE
##   <date>                <dbl>           <dbl>            <dbl>            <dbl>
## 1 2020-02-29          -0.0866        -0.0797          -0.0909           -0.0857
## 2 2020-03-31          -0.142         -0.212           -0.0941           -0.207 
## 3 2020-04-30           0.0624         0.111            0.110             0.151 
## 4 2020-05-31           0.0286         0.0491           0.0199            0.0223
## 5 2020-06-30           0.0326         0.0160           0.0104            0.0269
## 6 2020-07-31          -0.0166         0.00834          0.00427          -0.0282
## # ℹ 4 more variables: EM_Large_SPYM <dbl>, EM_Small_SPYX <dbl>,
## #   World_Benchmark_IUSQ <dbl>, RF_ESTR <dbl>
tail(returns_data)
## # A tibble: 6 × 9
##   Month_End  EU_Large_XMEU_DE EU_Small_SMC_PA US_Large_SXR8_DE US_Small_ZPRR_DE
##   <date>                <dbl>           <dbl>            <dbl>            <dbl>
## 1 2025-06-30         -0.0146         0.00894            0.0150           0.0190
## 2 2025-07-31          0.00852        0.0141             0.0609           0.0494
## 3 2025-08-31          0.0104        -0.00773           -0.0111           0.0402
## 4 2025-09-30          0.0154         0.000433           0.0281           0.0205
## 5 2025-10-31          0.0276         0.0130             0.0474           0.0390
## 6 2025-11-30         -0.0152        -0.0459            -0.0353          -0.0442
## # ℹ 4 more variables: EM_Large_SPYM <dbl>, EM_Small_SPYX <dbl>,
## #   World_Benchmark_IUSQ <dbl>, RF_ESTR <dbl>
data_checks <- tibble(
  Check = c(
    "Number of monthly price observations",
    "Number of monthly return observations",
    "Price start date",
    "Price end date",
    "Return start date",
    "Return end date",
    "Missing values in calculated asset returns",
    "Missing values in calculated world benchmark returns",
    "Missing values in risk-free rate"
  ),
  Result = c(
    nrow(prices),
    nrow(returns_data),
    as.character(min(prices$Month_End)),
    as.character(max(prices$Month_End)),
    as.character(min(returns_data$Month_End)),
    as.character(max(returns_data$Month_End)),
    sum(is.na(returns_data[asset_cols])),
    sum(is.na(returns_data[[benchmark_col]])),
    sum(is.na(returns_data[[rf_col]]))
  )
)

kable(data_checks, caption = "Data quality checks") %>%
  kable_styling(full_width = FALSE)
Data quality checks
Check Result
Number of monthly price observations 71
Number of monthly return observations 70
Price start date 2020-01-31
Price end date 2025-11-30
Return start date 2020-02-29
Return end date 2025-11-30
Missing values in calculated asset returns 0
Missing values in calculated world benchmark returns 0
Missing values in risk-free rate 0

The data quality checks confirm that the cleaned workbook provides 71 monthly price observations from January 2020 to November 2025 and 70 monthly return observations from February 2020 to November 2025. The monthly return sample contains no missing ETF returns, benchmark returns, or risk-free-rate observations.

# Validating with the pre-calculated Analysis_Input_R sheet 

analysis_input <- read_excel(file_path, sheet = "Analysis_Input_R") %>%
  mutate(
    Month_End = convert_excel_date(Month_End),
    across(-Month_End, as.numeric)
  ) %>%
  arrange(Month_End)

validation_summary <- tibble(
  Check = c(
    "Number of observations",
    "First month",
    "Last month",
    "Missing values",
    "Duplicate Month_End dates"
  ),
  Result = c(
    nrow(analysis_input),
    as.character(min(analysis_input$Month_End)),
    as.character(max(analysis_input$Month_End)),
    sum(is.na(analysis_input)),
    sum(duplicated(analysis_input$Month_End))
  )
)

knitr::kable(
  validation_summary,
  caption = "Validation of the final R input sheet"
) %>%
  kableExtra::kable_styling(full_width = FALSE)
Validation of the final R input sheet
Check Result
Number of observations 70
First month 2020-02-29
Last month 2025-11-30
Missing values 0
Duplicate Month_End dates 0

The validation table confirms that the final Analysis_Input_R sheet contains a complete and clean monthly sample for the analysis. The dataset includes 70 monthly observations from February 2020 to November 2025, with no missing values and no duplicate month-end dates. This confirms that the ETF returns, world benchmark return, and €STR risk-free rate are aligned by month before the empirical analysis begins.

# Monthly excess returns for the six ETF assets
excess_returns <- returns_data %>%
  mutate(across(all_of(asset_cols), ~ .x - RF_ESTR)) %>%
  select(Month_End, all_of(asset_cols))

# World benchmark excess return for later use
benchmark_excess <- returns_data %>%
  transmute(
    Month_End,
    World_Benchmark_Excess = .data[[benchmark_col]] - RF_ESTR
  )

excess_returns <- excess_returns %>%
  drop_na()

benchmark_excess <- benchmark_excess %>%
  drop_na()

head(excess_returns)
## # A tibble: 6 × 7
##   Month_End  EU_Large_XMEU_DE EU_Small_SMC_PA US_Large_SXR8_DE US_Small_ZPRR_DE
##   <date>                <dbl>           <dbl>            <dbl>            <dbl>
## 1 2020-02-29          -0.0862        -0.0793          -0.0905           -0.0853
## 2 2020-03-31          -0.142         -0.212           -0.0936           -0.206 
## 3 2020-04-30           0.0629         0.111            0.110             0.152 
## 4 2020-05-31           0.0290         0.0496           0.0203            0.0227
## 5 2020-06-30           0.0330         0.0165           0.0108            0.0274
## 6 2020-07-31          -0.0161         0.00880          0.00473          -0.0277
## # ℹ 2 more variables: EM_Large_SPYM <dbl>, EM_Small_SPYX <dbl>

3.1.2 Annualized risk-return statistics

# Matrix of monthly excess returns
R_excess <- excess_returns %>%
  select(all_of(asset_cols)) %>%
  as.matrix()

T_obs <- nrow(R_excess)

# Monthly statistics
mu_monthly <- colMeans(R_excess, na.rm = TRUE)
sd_monthly <- apply(R_excess, 2, sd, na.rm = TRUE)
sharpe_monthly <- mu_monthly / sd_monthly

# Annualized monthly mean excess returns and standard deviations.
# Mean returns are multiplied by 12, while volatility is multiplied by sqrt(12).
mu_annual <- mu_monthly * 12
sd_annual <- sd_monthly * sqrt(12)
sharpe_annual <- sharpe_monthly * sqrt(12)

# Estimate the sampling uncertainty of the Sharpe ratio.
# The standard error is computed at the monthly frequency and then annualized.
sharpe_se_monthly <- sqrt((1 + 0.5 * sharpe_monthly^2) / T_obs)
sharpe_se_annual <- sharpe_se_monthly * sqrt(12)

risk_return_table <- tibble(
  Asset = asset_cols,
  Mean_Excess_Return = as.numeric(mu_annual),
  Standard_Deviation = as.numeric(sd_annual),
  Sharpe_Ratio = as.numeric(sharpe_annual),
  Sharpe_SE = as.numeric(sharpe_se_annual),
  Sharpe_CI_Lower = Sharpe_Ratio - 1.96 * Sharpe_SE,
  Sharpe_CI_Upper = Sharpe_Ratio + 1.96 * Sharpe_SE
)

risk_return_table_display <- risk_return_table %>%
  mutate(
    Asset = asset_labels[Asset],
    Mean_Excess_Return = fmt_pct(Mean_Excess_Return),
    Standard_Deviation = fmt_pct(Standard_Deviation),
    Sharpe_Ratio = round(Sharpe_Ratio, 3),
    Sharpe_SE = round(Sharpe_SE, 3),
    Sharpe_CI_Lower = round(Sharpe_CI_Lower, 3),
    Sharpe_CI_Upper = round(Sharpe_CI_Upper, 3)
  ) %>%
  rename(
    `Asset` = Asset,
    `Mean excess return` = Mean_Excess_Return,
    `Volatility` = Standard_Deviation,
    `Sharpe` = Sharpe_Ratio,
    `Sharpe SE` = Sharpe_SE,
    `CI lower` = Sharpe_CI_Lower,
    `CI upper` = Sharpe_CI_Upper
  )

kable(
  risk_return_table_display,
  caption = "Annualized risk-return profile of the ETF universe"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Annualized risk-return profile of the ETF universe
Asset Mean excess return Volatility Sharpe Sharpe SE CI lower CI upper
EU Large 7.95% 15.02% 0.529 0.416 -0.287 1.345
EU Small 5.45% 19.15% 0.285 0.415 -0.528 1.098
US Large 12.71% 15.81% 0.804 0.420 -0.018 1.626
US Small 7.96% 22.68% 0.351 0.415 -0.463 1.165
EM Large 5.92% 13.85% 0.428 0.416 -0.387 1.242
EM Small 9.72% 16.06% 0.605 0.417 -0.213 1.423

The risk-return table shows that the U.S. large-cap ETF has the strongest standalone performance, with the highest annualized excess return and Sharpe ratio. Among small-cap ETFs, the emerging-market small-cap ETF performs best, while the European and U.S. small-cap ETFs have lower Sharpe ratios than their large-cap counterparts. This suggests that small-cap risk was not consistently compensated by higher risk-adjusted returns in this sample. The wide Sharpe ratio confidence intervals mean that these differences should still be interpreted cautiously.

write.csv(
  risk_return_table,
  file = file.path(output_dir, "step1_risk_return_table.csv"),
  row.names = FALSE
)
risk_return_plot_data <- risk_return_table %>%
  mutate(
    Asset_Label = asset_labels[Asset],
    Region = case_when(
      grepl("^EU", Asset) ~ "Europe",
      grepl("^US", Asset) ~ "United States",
      grepl("^EM", Asset) ~ "Emerging markets",
      TRUE ~ "Other"
    ),
    Size = case_when(
      grepl("Small", Asset) ~ "Small-cap",
      grepl("Large", Asset) ~ "Large-cap",
      TRUE ~ "Other"
    )
  )

risk_return_plot <- ggplot(
  risk_return_plot_data,
  aes(
    x = Standard_Deviation,
    y = Mean_Excess_Return,
    color = Size,
    label = Asset_Label
  )
) +
  geom_hline(yintercept = 0, linewidth = 0.4, color = "grey70") +
  geom_point(size = 4, alpha = 0.9) +
  ggrepel::geom_label_repel(
    size = 3.5,
    label.size = 0.2,
    label.padding = unit(0.15, "lines"),
    box.padding = 0.45,
    point.padding = 0.35,
    min.segment.length = 0,
    max.overlaps = Inf,
    show.legend = FALSE
  ) +
  scale_color_manual(
    values = c(
      "Large-cap" = "#1F77B4",
      "Small-cap" = "#D62728"
    )
  ) +
  scale_x_continuous(
    labels = percent_format(accuracy = 1),
    expand = expansion(mult = c(0.08, 0.12))
  ) +
  scale_y_continuous(
    labels = percent_format(accuracy = 1),
    expand = expansion(mult = c(0.12, 0.15))
  ) +
  labs(
    title = "Risk-return profile of the ETF universe",
    subtitle = "Annualized excess return and volatility",
    x = "Annualized volatility",
    y = "Annualized mean excess return",
    color = NULL
  ) +
  theme_report() +
  theme(
    legend.position = "bottom",
    plot.margin = margin(10, 20, 10, 10)
  )

risk_return_plot

ggsave(
  filename = file.path(output_dir, "step1_risk_return_scatter.png"),
  plot = risk_return_plot,
  width = 9,
  height = 6,
  dpi = 300
)

The scatterplot shows that the U.S. large-cap ETF achieved the strongest risk-return profile, with the highest annualized excess return and a relatively moderate level of volatility. The small-cap ETFs generally appear further to the right, meaning they carried higher volatility. However, this higher risk was not consistently rewarded with higher excess returns, especially for the European small-cap ETF. Overall, the figure suggests that, in this ETF universe and sample period, the small-cap premium is not clearly visible on a simple standalone risk-return basis.

3.1.3 Variance-covariance and correlation matrices

# The covariance matrix is annualized because it is used together with annualized expected excess returns in the portfolio optimization.
# The correlation matrix is left unchanged because correlations are scale-free.
cov_matrix_annual <- cov(R_excess, use = "complete.obs") * 12

# Correlation matrix
corr_matrix <- cor(R_excess, use = "complete.obs")

cov_table <- as.data.frame(cov_matrix_annual) %>%
  rownames_to_column("Asset")

cov_table_display <- cov_table %>%
  mutate(
    Asset = asset_labels[Asset],
    across(-Asset, ~ round(.x, 5))
  )

colnames(cov_table_display) <- c(
  "Asset",
  asset_labels[colnames(cov_table)[-1]]
)

kable(
  cov_table_display,
  caption = "Annualised variance-covariance matrix of monthly excess returns"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  scroll_box(width = "100%")
Annualised variance-covariance matrix of monthly excess returns
Asset EU Large EU Small US Large US Small EM Large EM Small
EU Large 0.02256 0.02692 0.01705 0.02319 0.01282 0.01788
EU Small 0.02692 0.03669 0.02132 0.03200 0.01612 0.02402
US Large 0.01705 0.02132 0.02500 0.02963 0.01253 0.01880
US Small 0.02319 0.03200 0.02963 0.05145 0.01890 0.02690
EM Large 0.01282 0.01612 0.01253 0.01890 0.01917 0.01830
EM Small 0.01788 0.02402 0.01880 0.02690 0.01830 0.02580

The variance-covariance matrix is annualized and provides the risk input used for the mean-variance optimization. The diagonal values represent each ETF’s annualized variance, while the off-diagonal values show how pairs of ETF excess returns move together. The U.S. small-cap ETF has the highest variance, which is consistent with its high volatility in the risk-return table.

corr_table <- as.data.frame(corr_matrix) %>%
  rownames_to_column("Asset")

corr_table_display <- corr_table %>%
  mutate(
    Asset = asset_labels[Asset],
    across(-Asset, ~ round(.x, 3))
  )

colnames(corr_table_display) <- c(
  "Asset",
  asset_labels[colnames(corr_table)[-1]]
)

kable(
  corr_table_display,
  caption = "Correlation matrix of monthly excess returns"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  scroll_box(width = "100%")
Correlation matrix of monthly excess returns
Asset EU Large EU Small US Large US Small EM Large EM Small
EU Large 1.000 0.936 0.718 0.681 0.616 0.741
EU Small 0.936 1.000 0.704 0.737 0.608 0.781
US Large 0.718 0.704 1.000 0.826 0.572 0.740
US Small 0.681 0.737 0.826 1.000 0.602 0.738
EM Large 0.616 0.608 0.572 0.602 1.000 0.823
EM Small 0.741 0.781 0.740 0.738 0.823 1.000

The correlation matrix shows that most ETFs are positively correlated, especially within the same region or size category. Emerging-market ETFs have somewhat lower correlations with European and U.S. ETFs than the Europe-U.S. pairs, but the relationships are still positive. This means that emerging-market exposure may provide some diversification benefit, but not a strong negative-correlation hedge.

corr_table_clean <- corr_table %>%
  mutate(Asset = asset_labels[Asset])

colnames(corr_table_clean) <- c(
  "Asset",
  asset_labels[colnames(corr_table)[-1]]
)

corr_long <- corr_table_clean %>%
  pivot_longer(-Asset, names_to = "Asset_2", values_to = "Correlation") %>%
  mutate(
    Asset = factor(Asset, levels = asset_labels),
    Asset_2 = factor(Asset_2, levels = asset_labels)
  )

corr_heatmap <- ggplot(corr_long, aes(x = Asset_2, y = Asset, fill = Correlation)) +
  geom_tile(color = "white", linewidth = 0.6) +
  geom_text(aes(label = round(Correlation, 2)), size = 3.3) +
  scale_fill_gradient2(
    limits = c(-1, 1),
    midpoint = 0,
    low = "#B2182B",
    mid = "white",
    high = "#2166AC"
  ) +
  coord_fixed() +
  labs(
    title = "Correlation matrix of ETF excess returns",
    subtitle = "Monthly excess returns",
    x = NULL,
    y = NULL,
    fill = "Correlation"
  ) +
  theme_report() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid = element_blank(),
    legend.position = "right",
    legend.title = element_text(size = 10)
  )

corr_heatmap

ggsave(
  filename = file.path(output_dir, "step1_correlation_heatmap.png"),
  plot = corr_heatmap,
  width = 8,
  height = 6,
  dpi = 300
)

The correlation heatmap shows strong co-movement between the European large-cap and small-cap ETFs, with a correlation of 0.94. The U.S. large-cap and small-cap ETFs are also highly correlated at 0.82, indicating that size segments within the same developed region provide limited diversification. The emerging-market ETFs have somewhat lower correlations with the European and U.S. ETFs, mostly between around 0.57 and 0.78, but these correlations are still positive and moderate to high.

3.1.4 Minimum-variance and tangency portfolios

# Inputs for portfolio optimization
mu <- as.numeric(mu_annual)
names(mu) <- asset_cols

Sigma <- cov_matrix_annual
n_assets <- length(asset_cols)

ones <- rep(1, n_assets)
# Unconstrained minimum-variance portfolio
Sigma_inv <- solve(Sigma)

w_mvp_unconstrained <- as.numeric(
  Sigma_inv %*% ones / as.numeric(t(ones) %*% Sigma_inv %*% ones)
)
names(w_mvp_unconstrained) <- asset_cols


# Unconstrained tangency portfolio
w_tangency_unconstrained_raw <- as.numeric(Sigma_inv %*% mu)

if (abs(sum(w_tangency_unconstrained_raw)) < 1e-8) {
  stop("Unconstrained tangency weights are unstable because the denominator is close to zero.")
}

w_tangency_unconstrained <- w_tangency_unconstrained_raw / sum(w_tangency_unconstrained_raw)
names(w_tangency_unconstrained) <- asset_cols
# Long-only minimum-variance portfolio
# Constraints:
# sum(w) = 1
# w_i >= 0

Dmat <- 2 * Sigma
dvec <- rep(0, n_assets)

Amat_mvp <- cbind(
  ones,
  diag(n_assets)
)

bvec_mvp <- c(
  1,
  rep(0, n_assets)
)

qp_mvp_long <- solve.QP(
  Dmat = Dmat,
  dvec = dvec,
  Amat = Amat_mvp,
  bvec = bvec_mvp,
  meq = 1
)

w_mvp_long <- qp_mvp_long$solution
names(w_mvp_long) <- asset_cols


# For each target expected excess return, this function finds the minimum-variance portfolio.
# If long_only = TRUE, all weights must be non-negative.
# If long_only = FALSE, short-selling is allowed.

solve_target_portfolio <- function(target_return, mu, Sigma, long_only = TRUE) {
  n <- length(mu)
  
  Dmat <- 2 * Sigma
  dvec <- rep(0, n)
  
  if (long_only) {
    Amat <- cbind(
      rep(1, n),
      mu,
      diag(n)
    )
    
    bvec <- c(
      1,
      target_return,
      rep(0, n)
    )
    
    meq <- 2
  } else {
    Amat <- cbind(
      rep(1, n),
      mu
    )
    
    bvec <- c(
      1,
      target_return
    )
    
    meq <- 2
  }
  
  result <- tryCatch(
    solve.QP(
      Dmat = Dmat,
      dvec = dvec,
      Amat = Amat,
      bvec = bvec,
      meq = meq
    ),
    error = function(e) NULL
  )
  
  if (is.null(result)) {
    return(NULL)
  }
  
  weights <- as.numeric(result$solution)
  names(weights) <- names(mu)
  
  perf <- portfolio_performance(weights, mu, Sigma)
  
  tibble(
    Target_Return = target_return,
    Mean_Excess_Return = perf$Mean_Excess_Return,
    Standard_Deviation = perf$Standard_Deviation,
    Sharpe_Ratio = perf$Sharpe_Ratio,
    Weights = list(weights)
  )
}


# Long-only efficient frontier
# A long-only portfolio cannot have an expected return above the highest individual asset return.
# Therefore, the target-return grid runs from the long-only MVP return to max(mu).

mvp_long_return <- as.numeric(t(w_mvp_long) %*% mu)

target_grid_long <- seq(
  from = mvp_long_return,
  to = max(mu),
  length.out = 300
)

frontier_long <- purrr::map_dfr(
  target_grid_long,
  ~ solve_target_portfolio(.x, mu, Sigma, long_only = TRUE)
) %>%
  tidyr::drop_na()

if (nrow(frontier_long) == 0) {
  stop("frontier_long is empty. Check the long-only target-return grid.")
}


# Long-only tangency portfolio
best_long_index <- which.max(frontier_long$Sharpe_Ratio)

w_tangency_long <- frontier_long$Weights[[best_long_index]]
names(w_tangency_long) <- asset_cols


# Unconstrained efficient frontier
mvp_unconstrained_return <- as.numeric(t(w_mvp_unconstrained) %*% mu)

tangency_unconstrained_return <- as.numeric(t(w_tangency_unconstrained) %*% mu)

target_grid_unconstrained <- seq(
  from = mvp_unconstrained_return,
  to = max(max(mu), tangency_unconstrained_return) * 1.05,
  length.out = 500
)

frontier_unconstrained <- purrr::map_dfr(
  target_grid_unconstrained,
  ~ solve_target_portfolio(.x, mu, Sigma, long_only = FALSE)
) %>%
  tidyr::drop_na()

if (nrow(frontier_unconstrained) == 0) {
  stop("frontier_unconstrained is empty. Check the unconstrained target-return grid.")
}


# Quick internal check for the frontier objects
frontier_check <- tibble(
  Frontier = c("Long-only", "Unconstrained"),
  Rows = c(nrow(frontier_long), nrow(frontier_unconstrained)),
  Min_Return = c(
    min(frontier_long$Mean_Excess_Return),
    min(frontier_unconstrained$Mean_Excess_Return)
  ),
  Max_Return = c(
    max(frontier_long$Mean_Excess_Return),
    max(frontier_unconstrained$Mean_Excess_Return)
  )
)

knitr::kable(
  frontier_check,
  caption = "Efficient frontier construction check"
) %>%
  kableExtra::kable_styling(full_width = FALSE)
Efficient frontier construction check
Frontier Rows Min_Return Max_Return
Long-only 300 0.0780535 0.1271368
Unconstrained 500 0.0983941 0.1896888
nrow(frontier_long)
## [1] 300
nrow(frontier_unconstrained)
## [1] 500
summary(frontier_long$Mean_Excess_Return)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.07805 0.09032 0.10260 0.10260 0.11487 0.12714
summary(frontier_unconstrained$Mean_Excess_Return)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.09839 0.12122 0.14404 0.14404 0.16687 0.18969
# Combining portfolio weights
weights_table <- tibble(
  Asset = asset_cols,
  MVP_Unconstrained = w_mvp_unconstrained,
  MVP_Long_Only = w_mvp_long,
  Tangency_Unconstrained = w_tangency_unconstrained,
  Tangency_Long_Only = w_tangency_long
)

# Portfolio performance
portfolio_summary <- bind_rows(
  portfolio_performance(w_mvp_unconstrained, mu, Sigma) %>%
    mutate(Portfolio = "MVP_Unconstrained"),
  portfolio_performance(w_mvp_long, mu, Sigma) %>%
    mutate(Portfolio = "MVP_Long_Only"),
  portfolio_performance(w_tangency_unconstrained, mu, Sigma) %>%
    mutate(Portfolio = "Tangency_Unconstrained"),
  portfolio_performance(w_tangency_long, mu, Sigma) %>%
    mutate(Portfolio = "Tangency_Long_Only")
) %>%
  select(Portfolio, Mean_Excess_Return, Standard_Deviation, Sharpe_Ratio)

weights_table_display <- weights_table %>%
  mutate(across(-Asset, ~ fmt_pct(.x, digits = 2)))

portfolio_summary_display <- portfolio_summary %>%
  mutate(
    Mean_Excess_Return = fmt_pct(Mean_Excess_Return),
    Standard_Deviation = fmt_pct(Standard_Deviation),
    Sharpe_Ratio = round(Sharpe_Ratio, 3)
  )

kable(weights_table_display,
      caption = "Portfolio weights: unconstrained and long-only solutions") %>%
  kable_styling(full_width = FALSE)
Portfolio weights: unconstrained and long-only solutions
Asset MVP_Unconstrained MVP_Long_Only Tangency_Unconstrained Tangency_Long_Only
EU_Large_XMEU_DE 80.85% 25.97% 137.46% 0.00%
EU_Small_SMC_PA -49.10% 0.00% -140.47% 0.00%
US_Large_SXR8_DE 41.29% 19.99% 95.78% 97.26%
US_Small_ZPRR_DE -20.72% 0.00% -38.41% 0.00%
EM_Large_SPYM 56.39% 54.04% -32.95% 0.00%
EM_Small_SPYX -8.70% 0.00% 78.60% 2.74%
kable(portfolio_summary_display,
      caption = "Portfolio performance: annualized excess return, volatility and Sharpe ratio") %>%
  kable_styling(full_width = FALSE)
Portfolio performance: annualized excess return, volatility and Sharpe ratio
Portfolio Mean_Excess_Return Standard_Deviation Sharpe_Ratio
MVP_Unconstrained 9.84% 11.37% 0.866
MVP_Long_Only 7.81% 12.73% 0.613
Tangency_Unconstrained 18.07% 15.40% 1.173
Tangency_Long_Only 12.63% 15.71% 0.804

The portfolio tables compare unconstrained and long-only mean-variance solutions. The unconstrained portfolios represent the theoretical optimum when short-selling is allowed, while the long-only portfolios are more realistic for many ETF investors. In this sample, the long-only constraint reduces the tangency portfolio Sharpe ratio from 1.173 to 0.804, showing that short-selling flexibility improves the theoretical risk-return trade-off. However, the long-only tangency portfolio is highly concentrated, with most of the weight allocated to the U.S. large-cap ETF, so it is more implementable but still poorly diversified.

write.csv(weights_table, file.path(output_dir, "step1_portfolio_weights.csv"), row.names = FALSE)
write.csv(portfolio_summary, file.path(output_dir, "step1_portfolio_summary.csv"), row.names = FALSE)

3.1.5 Efficient frontier

# Only the efficient upper branch is plotted, starting from the MVP return upward.
# Preparing individual asset points
asset_points_clean <- risk_return_table %>%
  transmute(
    Name = Asset,
    Label = asset_labels[Asset],
    Mean_Excess_Return,
    Standard_Deviation,
    Size = case_when(
      grepl("Small", Asset) ~ "Small-cap ETF",
      grepl("Large", Asset) ~ "Large-cap ETF",
      TRUE ~ "ETF"
    )
  )

# Preparing optimal portfolio points
portfolio_points_clean <- portfolio_summary %>%
  mutate(
    Label = portfolio_labels[Portfolio],
    Portfolio_Type = case_when(
      grepl("MVP", Portfolio) ~ "Minimum-variance portfolio",
      grepl("Tangency", Portfolio) ~ "Tangency portfolio",
      TRUE ~ "Optimal portfolio"
    )
  )

# The small tolerance avoids accidentally removing the MVP due to numerical rounding.
mvp_unconstrained_return <- portfolio_summary %>%
  filter(Portfolio == "MVP_Unconstrained") %>%
  pull(Mean_Excess_Return)

mvp_long_return <- portfolio_summary %>%
  filter(Portfolio == "MVP_Long_Only") %>%
  pull(Mean_Excess_Return)

# Keep only the efficient branch of the unconstrained frontier
frontier_unconstrained_plot_data <- frontier_unconstrained %>%
  filter(Mean_Excess_Return >= mvp_unconstrained_return - 1e-8) %>%
  arrange(Mean_Excess_Return) %>%
  mutate(Constraint = "Unconstrained")

# Keep only the efficient branch of the long-only frontier
frontier_long_plot_data <- frontier_long %>%
  filter(Mean_Excess_Return >= mvp_long_return - 1e-8) %>%
  arrange(Mean_Excess_Return) %>%
  mutate(Constraint = "Long-only")

# Safety check
if (nrow(frontier_unconstrained_plot_data) == 0) {
  stop("No unconstrained frontier data available for plotting.")
}

if (nrow(frontier_long_plot_data) == 0) {
  stop("No long-only frontier data available for plotting.")
}

# The plot highlights the optimal portfolios while keeping individual ETFs in the legend to avoid overcrowding the figure.
efficient_frontier_plot <- ggplot() +
  geom_path(
    data = frontier_unconstrained_plot_data,
    aes(
      x = Standard_Deviation,
      y = Mean_Excess_Return,
      linetype = Constraint,
      group = Constraint
    ),
    linewidth = 1.1,
    color = "grey25"
  ) +
  geom_path(
    data = frontier_long_plot_data,
    aes(
      x = Standard_Deviation,
      y = Mean_Excess_Return,
      linetype = Constraint,
      group = Constraint
    ),
    linewidth = 1.1,
    color = "grey55"
  ) +
  geom_point(
    data = asset_points_clean,
    aes(
      x = Standard_Deviation,
      y = Mean_Excess_Return,
      color = Size
    ),
    size = 3.2,
    alpha = 0.8
  ) +
  geom_point(
    data = portfolio_points_clean,
    aes(
      x = Standard_Deviation,
      y = Mean_Excess_Return,
      shape = Portfolio_Type
    ),
    size = 4.5,
    color = "black"
  ) +
  ggrepel::geom_label_repel(
    data = portfolio_points_clean,
    aes(
      x = Standard_Deviation,
      y = Mean_Excess_Return,
      label = Label
    ),
    size = 3.3,
    fontface = "bold",
    label.size = 0.25,
    label.padding = unit(0.18, "lines"),
    box.padding = 0.7,
    point.padding = 0.5,
    min.segment.length = 0,
    max.overlaps = Inf,
    show.legend = FALSE
  ) +
  scale_color_manual(
    values = c(
      "Large-cap ETF" = "#1F77B4",
      "Small-cap ETF" = "#D62728"
    )
  ) +
  scale_shape_manual(
    values = c(
      "Minimum-variance portfolio" = 15,
      "Tangency portfolio" = 17
    )
  ) +
  scale_linetype_manual(
    values = c(
      "Unconstrained" = "solid",
      "Long-only" = "dashed"
    )
  ) +
  scale_x_continuous(
    labels = percent_format(accuracy = 1),
    expand = expansion(mult = c(0.08, 0.18))
  ) +
  scale_y_continuous(
    labels = percent_format(accuracy = 1),
    expand = expansion(mult = c(0.12, 0.18))
  ) +
  coord_cartesian(clip = "off") +
  labs(
    title = "Efficient frontier of the ETF universe",
    subtitle = "Only the efficient upper branch is shown; MVP and tangency portfolios are marked",
    x = "Annualized volatility",
    y = "Annualized mean excess return",
    color = NULL,
    shape = NULL,
    linetype = NULL
  ) +
  theme_report() +
  theme(
    legend.position = "bottom",
    legend.box = "vertical",
    plot.margin = margin(10, 35, 10, 10)
  )

efficient_frontier_plot

ggsave(
  filename = file.path(output_dir, "step1_efficient_frontier.png"),
  plot = efficient_frontier_plot,
  width = 10.5,
  height = 7,
  dpi = 300
)

The efficient frontier shows the best attainable risk-return combinations for the selected ETF universe. The unconstrained frontier expands the feasible opportunity set relative to the long-only frontier. However, the long-only frontier is more realistic for an ETF investor. The tangency portfolio also has a higher Sharpe ratio when short-selling is allowed, while the long-only tangency portfolio represents the more implementable maximum-Sharpe solution.

3.1.6 Interpretation

# Important values
best_sharpe_asset <- risk_return_table %>%
  arrange(desc(Sharpe_Ratio)) %>%
  slice(1)

worst_sharpe_asset <- risk_return_table %>%
  arrange(Sharpe_Ratio) %>%
  slice(1)

highest_return_asset <- risk_return_table %>%
  arrange(desc(Mean_Excess_Return)) %>%
  slice(1)

lowest_vol_asset <- risk_return_table %>%
  arrange(Standard_Deviation) %>%
  slice(1)

interpretation_summary <- tibble(
  Item = c(
    "Best individual Sharpe ratio",
    "Worst individual Sharpe ratio",
    "Highest annualised mean excess return",
    "Lowest annualised volatility",
    "Unconstrained tangency Sharpe ratio",
    "Long-only tangency Sharpe ratio",
    "Unconstrained MVP volatility",
    "Long-only MVP volatility"
  ),
  Result = c(
    paste0(best_sharpe_asset$Asset, " / Sharpe = ", round(best_sharpe_asset$Sharpe_Ratio, 3)),
    paste0(worst_sharpe_asset$Asset, " / Sharpe = ", round(worst_sharpe_asset$Sharpe_Ratio, 3)),
    paste0(highest_return_asset$Asset, " / Return = ", fmt_pct(highest_return_asset$Mean_Excess_Return)),
    paste0(lowest_vol_asset$Asset, " / Volatility = ", fmt_pct(lowest_vol_asset$Standard_Deviation)),
    round(portfolio_summary$Sharpe_Ratio[portfolio_summary$Portfolio == "Tangency_Unconstrained"], 3),
    round(portfolio_summary$Sharpe_Ratio[portfolio_summary$Portfolio == "Tangency_Long_Only"], 3),
    fmt_pct(portfolio_summary$Standard_Deviation[portfolio_summary$Portfolio == "MVP_Unconstrained"]),
    fmt_pct(portfolio_summary$Standard_Deviation[portfolio_summary$Portfolio == "MVP_Long_Only"])
  )
)

kable(interpretation_summary,
      caption = "Key values") %>%
  kable_styling(full_width = FALSE)
Key values
Item Result
Best individual Sharpe ratio US_Large_SXR8_DE / Sharpe = 0.804
Worst individual Sharpe ratio EU_Small_SMC_PA / Sharpe = 0.285
Highest annualised mean excess return US_Large_SXR8_DE / Return = 12.71%
Lowest annualised volatility EM_Large_SPYM / Volatility = 13.85%
Unconstrained tangency Sharpe ratio 1.173
Long-only tangency Sharpe ratio 0.804
Unconstrained MVP volatility 11.37%
Long-only MVP volatility 12.73%
pair_comparison <- tibble(
  Region = c("Europe", "United States", "Emerging markets"),
  Large_Cap = c("EU_Large_XMEU_DE", "US_Large_SXR8_DE", "EM_Large_SPYM"),
  Small_Cap = c("EU_Small_SMC_PA", "US_Small_ZPRR_DE", "EM_Small_SPYX")
) %>%
  rowwise() %>%
  mutate(
    Large_Return = risk_return_table$Mean_Excess_Return[risk_return_table$Asset == Large_Cap],
    Small_Return = risk_return_table$Mean_Excess_Return[risk_return_table$Asset == Small_Cap],
    Return_Difference = Small_Return - Large_Return,
    
    Large_Volatility = risk_return_table$Standard_Deviation[risk_return_table$Asset == Large_Cap],
    Small_Volatility = risk_return_table$Standard_Deviation[risk_return_table$Asset == Small_Cap],
    Volatility_Difference = Small_Volatility - Large_Volatility,
    
    Large_Sharpe = risk_return_table$Sharpe_Ratio[risk_return_table$Asset == Large_Cap],
    Small_Sharpe = risk_return_table$Sharpe_Ratio[risk_return_table$Asset == Small_Cap],
    Sharpe_Difference = Small_Sharpe - Large_Sharpe
  ) %>%
  ungroup()

pair_comparison_display <- pair_comparison %>%
  transmute(
    Region,
    `Large return` = fmt_pct(Large_Return),
    `Small return` = fmt_pct(Small_Return),
    `Return diff.` = fmt_pct(Return_Difference),
    `Large vol.` = fmt_pct(Large_Volatility),
    `Small vol.` = fmt_pct(Small_Volatility),
    `Vol. diff.` = fmt_pct(Volatility_Difference),
    `Large Sharpe` = round(Large_Sharpe, 3),
    `Small Sharpe` = round(Small_Sharpe, 3),
    `Sharpe diff.` = round(Sharpe_Difference, 3)
  )

kable(
  pair_comparison_display,
  caption = "Direct comparison of small-cap and large-cap ETFs by region"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Direct comparison of small-cap and large-cap ETFs by region
Region Large return Small return Return diff. Large vol.  Small vol.  Vol. diff. Large Sharpe Small Sharpe Sharpe diff.
Europe 7.95% 5.45% -2.49% 15.02% 19.15% 4.14% 0.529 0.285 -0.244
United States 12.71% 7.96% -4.75% 15.81% 22.68% 6.87% 0.804 0.351 -0.453
Emerging markets 5.92% 9.72% 3.80% 13.85% 16.06% 2.22% 0.428 0.605 0.177

The evidence for a small-cap premium is mixed. Small-cap ETFs do not outperform their large-cap counterparts in Europe or the United States, either in terms of annualized excess return or Sharpe ratio. However, the emerging-market small-cap ETF performed better than the emerging-market large-cap ETF in this sample. Overall, the results do not support a broad or consistent small-cap premium across all regions during the sample period.

write.csv(risk_return_table, file.path(output_dir, "step1_risk_return_table.csv"), row.names = FALSE)
write.csv(pair_comparison, file.path(output_dir, "step1_pair_comparison.csv"), row.names = FALSE)
write.csv(cov_table, file.path(output_dir, "step1_covariance_matrix_annual.csv"), row.names = FALSE)
write.csv(corr_table, file.path(output_dir, "step1_correlation_matrix.csv"), row.names = FALSE)
write.csv(weights_table, file.path(output_dir, "step1_portfolio_weights.csv"), row.names = FALSE)
write.csv(portfolio_summary, file.path(output_dir, "step1_portfolio_summary.csv"), row.names = FALSE)

3.2 Step 2: Index Model and Risk Decomposition

# This section estimates a single-index model for each ETF:
# asset excess return = alpha + beta * market excess return + residual
# The benchmark is the global equity benchmark already used in the data set.

library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(lmtest)
library(sandwich)
library(knitr)
library(kableExtra)

# Setting the Newey-West lag length for monthly data
nw_lag <- 4

3.2.1 Prepare data for the single-index regressions

# The excess return and benchmark series have already been aligned before Task 3.1.
benchmark_excess_clean <- benchmark_excess %>%
  rename(Market_Excess = World_Benchmark_Excess)

index_model_data <- excess_returns %>%
  left_join(benchmark_excess_clean, by = "Month_End") %>%
  drop_na()

n_obs_index_model <- nrow(index_model_data)

index_model_data_overview <- tibble(
  Item = c(
    "Number of monthly observations",
    "Start date",
    "End date",
    "Market benchmark",
    "Risk-free rate",
    "Newey-West lag length",
    "Alignment adjustment"
  ),
  Value = c(
    n_obs_index_model,
    as.character(min(index_model_data$Month_End)),
    as.character(max(index_model_data$Month_End)),
    "World_Benchmark_IUSQ",
    "RF_ESTR",
    nw_lag,
    "None: all series are aligned by Month_End Excel workbook"
  )
)

kable(
  index_model_data_overview,
  caption = "Single-index model data overview",
  align = c("l", "l")
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Single-index model data overview
Item Value
Number of monthly observations 70
Start date 2020-02-29
End date 2025-11-30
Market benchmark World_Benchmark_IUSQ
Risk-free rate RF_ESTR
Newey-West lag length 4
Alignment adjustment None: all series are aligned by Month_End Excel workbook
str(index_model_data$Market_Excess)
##  num [1:70] -0.0839 -0.112 0.0943 0.0196 0.0243 ...
# Identifying the asset excess return columns
asset_cols <- index_model_data %>%
  select(-any_of(c("Date", "date", "Month_End", "Market_Excess"))) %>%
  names()

asset_cols
## [1] "EU_Large_XMEU_DE" "EU_Small_SMC_PA"  "US_Large_SXR8_DE" "US_Small_ZPRR_DE"
## [5] "EM_Large_SPYM"    "EM_Small_SPYX"

3.2.2 Estimate single-index regressions with Newey-West standard errors

# Diagnostic check: correlations with market excess return
correlation_check <- index_model_data %>%
  select(all_of(asset_cols), Market_Excess) %>%
  cor(use = "complete.obs") %>%
  as.data.frame() %>%
  rownames_to_column("Asset") %>%
  select(Asset, Market_Excess) %>%
  filter(Asset != "Market_Excess") %>%
  mutate(
    Market_Excess = round(Market_Excess, 3)
  )

kable(
  correlation_check,
  caption = "Correlation of each ETF excess return with the market benchmark excess return",
  align = c("l", "r")
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Correlation of each ETF excess return with the market benchmark excess return
Asset Market_Excess
EU_Large_XMEU_DE 0.841
EU_Small_SMC_PA 0.817
US_Large_SXR8_DE 0.960
US_Small_ZPRR_DE 0.852
EM_Large_SPYM 0.687
EM_Small_SPYX 0.823
library(scales)

# Function to estimate one single-index model and extract the required statistics
estimate_index_model <- function(asset_name) {
  
  # Regression formula: asset excess return = alpha + beta * market excess return
  formula_i <- as.formula(paste(asset_name, "~ Market_Excess"))
  
  # Estimate OLS regression
  model_i <- lm(formula_i, data = index_model_data)
  
  # Newey-West HAC covariance matrix with lag 4
  nw_cov_i <- NeweyWest(model_i, lag = nw_lag, prewhite = FALSE, adjust = TRUE)
  
  # Coefficient table using Newey-West standard errors
  nw_test_i <- coeftest(model_i, vcov. = nw_cov_i)
  
  # Extract coefficients and Newey-West standard errors
  alpha_monthly <- nw_test_i["(Intercept)", "Estimate"]
  beta <- nw_test_i["Market_Excess", "Estimate"]
  
  alpha_se_monthly <- nw_test_i["(Intercept)", "Std. Error"]
  beta_se <- nw_test_i["Market_Excess", "Std. Error"]
  
  alpha_t <- nw_test_i["(Intercept)", "t value"]
  alpha_p <- nw_test_i["(Intercept)", "Pr(>|t|)"]
  
  # Residual standard deviation and R-squared
  residual_sd_monthly <- sigma(model_i)
  r_squared <- summary(model_i)$r.squared
  
  # Annualized alpha and residual standard deviation for easier interpretation
  alpha_annualised <- alpha_monthly * 12
  alpha_se_annualised <- alpha_se_monthly * 12
  residual_sd_annualised <- residual_sd_monthly * sqrt(12)
  
  tibble(
    Asset = asset_name,
    Alpha_Annualised = alpha_annualised,
    Beta = beta,
    NW_SE_Alpha_Annualised = alpha_se_annualised,
    NW_SE_Beta = beta_se,
    Alpha_t_stat = alpha_t,
    Alpha_p_value = alpha_p,
    Residual_SD_Annualised = residual_sd_annualised,
    R_squared = r_squared
  )
}

index_model_results <- map_dfr(asset_cols, estimate_index_model)

index_model_results_table <- index_model_results %>%
  mutate(
    Alpha_Annualised = percent(Alpha_Annualised, accuracy = 0.01),
    Beta = round(Beta, 3),
    NW_SE_Alpha_Annualised = percent(NW_SE_Alpha_Annualised, accuracy = 0.01),
    NW_SE_Beta = round(NW_SE_Beta, 3),
    Alpha_t_stat = round(Alpha_t_stat, 3),
    Alpha_p_value = round(Alpha_p_value, 3),
    Residual_SD_Annualised = percent(Residual_SD_Annualised, accuracy = 0.01),
    R_squared = percent(R_squared, accuracy = 0.1)
  )

index_model_results_table <- index_model_results %>%
  transmute(
    Asset,
    `Alpha (ann.)` = percent(Alpha_Annualised, accuracy = 0.01),
    Beta = round(Beta, 3),
    `NW SE alpha` = percent(NW_SE_Alpha_Annualised, accuracy = 0.01),
    `NW SE beta` = round(NW_SE_Beta, 3),
    `Alpha t-stat` = round(Alpha_t_stat, 3),
    `Alpha p-value` = round(Alpha_p_value, 3),
    `Residual SD (ann.)` = percent(Residual_SD_Annualised, accuracy = 0.01),
    `R-squared` = percent(R_squared, accuracy = 0.1)
  )

kable(
  index_model_results_table,
  caption = "Single-index model regression results with Newey-West standard errors",
  align = c("l", rep("r", 8))
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  scroll_box(width = "100%")
Single-index model regression results with Newey-West standard errors
Asset Alpha (ann.) Beta NW SE alpha NW SE beta Alpha t-stat Alpha p-value Residual SD (ann.) R-squared
EU_Large_XMEU_DE -1.77% 0.918 3.65% 0.082 -0.484 0.630 8.18% 70.8%
EU_Small_SMC_PA -6.57% 1.136 5.39% 0.135 -1.219 0.227 11.13% 66.7%
US_Large_SXR8_DE 1.05% 1.102 1.80% 0.047 0.582 0.562 4.47% 92.1%
US_Small_ZPRR_DE -6.90% 1.404 4.47% 0.079 -1.545 0.127 11.94% 72.7%
EM_Large_SPYM -1.39% 0.691 3.53% 0.090 -0.395 0.694 10.13% 47.2%
EM_Small_SPYX -0.44% 0.960 3.80% 0.144 -0.117 0.907 9.20% 67.7%

The single-index regressions show that market exposure differs substantially across the ETFs. The U.S. small-cap ETF has the highest beta of 1.404, indicating the strongest sensitivity to movements in the world equity benchmark. The emerging-market large-cap ETF has the lowest beta of 0.691, making it the most defensive asset in this regression framework. The U.S. large-cap ETF has the highest R-squared at 92.1%, meaning that its excess returns are most strongly explained by the benchmark. In contrast, the emerging-market large-cap ETF has the lowest R-squared at 47.2%, suggesting a larger idiosyncratic component. None of the alpha p-values is below 0.05, so there is no statistically significant evidence of abnormal excess returns at the 5% level.

3.2.3 Risk decomposition: systematic and idiosyncratic variance

# Function to estimate risk decomposition for one asset
estimate_risk_decomposition <- function(asset_name) {
  
  # Regression formula
  formula_i <- as.formula(paste(asset_name, "~ Market_Excess"))
  
  # Estimate single-index model
  model_i <- lm(formula_i, data = index_model_data)
  
  # Extract beta and residuals
  beta_i <- coef(model_i)["Market_Excess"]
  residuals_i <- residuals(model_i)
  
  # Monthly market variance
  market_variance_monthly <- var(index_model_data$Market_Excess, na.rm = TRUE)
  
  # Monthly variance decomposition
  systematic_variance_monthly <- beta_i^2 * market_variance_monthly
  idiosyncratic_variance_monthly <- var(residuals_i, na.rm = TRUE)
  total_variance_monthly <- systematic_variance_monthly + idiosyncratic_variance_monthly
  
  # Shares of total model-implied variance
  systematic_share <- systematic_variance_monthly / total_variance_monthly
  idiosyncratic_share <- idiosyncratic_variance_monthly / total_variance_monthly
  
  # Annualize variance measures
  systematic_variance_annualised <- systematic_variance_monthly * 12
  idiosyncratic_variance_annualised <- idiosyncratic_variance_monthly * 12
  total_variance_annualised <- total_variance_monthly * 12
  
  tibble(
    Asset = asset_name,
    Total_Variance_Annualised = total_variance_annualised,
    Systematic_Variance_Annualised = systematic_variance_annualised,
    Idiosyncratic_Variance_Annualised = idiosyncratic_variance_annualised,
    Systematic_Share = systematic_share,
    Idiosyncratic_Share = idiosyncratic_share
  )
}

risk_decomposition_results <- map_dfr(asset_cols, estimate_risk_decomposition)

risk_decomposition_table <- risk_decomposition_results %>%
  transmute(
    Asset,
    `Total var. (ann.)` = percent(Total_Variance_Annualised, accuracy = 0.01),
    `Systematic var. (ann.)` = percent(Systematic_Variance_Annualised, accuracy = 0.01),
    `Idiosyncratic var. (ann.)` = percent(Idiosyncratic_Variance_Annualised, accuracy = 0.01),
    `Systematic share` = percent(Systematic_Share, accuracy = 0.1),
    `Idiosyncratic share` = percent(Idiosyncratic_Share, accuracy = 0.1)
  )

kable(
  risk_decomposition_table,
  caption = "Variance decomposition from the single-index model",
  align = c("l", rep("r", 5))
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  )
Variance decomposition from the single-index model
Asset Total var. (ann.) Systematic var. (ann.) Idiosyncratic var. (ann.) Systematic share Idiosyncratic share
EU_Large_XMEU_DE 2.26% 1.60% 0.66% 70.8% 29.2%
EU_Small_SMC_PA 3.67% 2.45% 1.22% 66.7% 33.3%
US_Large_SXR8_DE 2.50% 2.30% 0.20% 92.1% 7.9%
US_Small_ZPRR_DE 5.14% 3.74% 1.41% 72.7% 27.3%
EM_Large_SPYM 1.92% 0.91% 1.01% 47.2% 52.8%
EM_Small_SPYX 2.58% 1.75% 0.83% 67.7% 32.3%
# Plotting systematic and idiosyncratic risk shares
risk_decomposition_plot_data <- risk_decomposition_results %>%
  select(Asset, Systematic_Share, Idiosyncratic_Share) %>%
  pivot_longer(
    cols = c(Systematic_Share, Idiosyncratic_Share),
    names_to = "Risk_Component",
    values_to = "Share"
  ) %>%
  mutate(
    Risk_Component = recode(
      Risk_Component,
      Systematic_Share = "Systematic risk",
      Idiosyncratic_Share = "Idiosyncratic risk"
    )
  )

ggplot(risk_decomposition_plot_data, aes(x = Asset, y = Share, fill = Risk_Component)) +
  geom_col(position = "stack") +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Systematic and idiosyncratic risk shares",
    x = "Asset",
    y = "Share of total model-implied variance",
    fill = "Risk component"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

The variance decomposition shows that most ETFs are primarily systematic-risk vehicles, because the majority of their model-implied variance is explained by exposure to the world equity benchmark. This is especially clear for the U.S. large-cap ETF, where systematic risk accounts for 92.1% of total variance. The emerging-market large-cap ETF is the main exception: only 47.2% of its variance is systematic, while 52.8% is idiosyncratic. This suggests that its returns are less closely explained by the global benchmark and are more influenced by regional, style, liquidity, currency, or other residual factors not captured by the world benchmark.

3.2.4 Beta confidence intervals and defensive/aggressive classification

# The 95% confidence interval for beta is calculated using the Newey-West standard error of beta from the single-index regression.
beta_classification_table <- index_model_results %>%
  mutate(
    Beta_CI_Lower = Beta - 1.96 * NW_SE_Beta,
    Beta_CI_Upper = Beta + 1.96 * NW_SE_Beta,
    Classification = case_when(
      Beta > 1 ~ "Aggressive",
      Beta < 1 ~ "Defensive",
      TRUE ~ "Market-like"
    ),
    Precision_Comment = case_when(
      Beta_CI_Lower > 1 ~ "Clearly above 1",
      Beta_CI_Upper < 1 ~ "Clearly below 1",
      TRUE ~ "CI includes 1"
    )
  ) %>%
  transmute(
    Asset,
    Beta = round(Beta, 3),
    `95% CI lower` = round(Beta_CI_Lower, 3),
    `95% CI upper` = round(Beta_CI_Upper, 3),
    Classification,
    `Precision comment` = Precision_Comment
  )

kable(
  beta_classification_table,
  caption = "Beta confidence intervals and defensive/aggressive classification",
  align = c("l", "r", "r", "r", "l", "l")
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  )
Beta confidence intervals and defensive/aggressive classification
Asset Beta 95% CI lower 95% CI upper Classification Precision comment
EU_Large_XMEU_DE 0.918 0.756 1.079 Defensive CI includes 1
EU_Small_SMC_PA 1.136 0.871 1.402 Aggressive CI includes 1
US_Large_SXR8_DE 1.102 1.011 1.193 Aggressive Clearly above 1
US_Small_ZPRR_DE 1.404 1.250 1.559 Aggressive Clearly above 1
EM_Large_SPYM 0.691 0.514 0.868 Defensive Clearly below 1
EM_Small_SPYX 0.960 0.677 1.243 Defensive CI includes 1

The beta estimates show that the U.S. small-cap ETF is the most aggressive asset, with a beta of 1.404 and a 95% confidence interval from 1.250 to 1.559. Since the full interval is above 1, its market sensitivity is clearly higher than the benchmark. The U.S. large-cap ETF is also aggressive, with a beta of 1.102 and a confidence interval from 1.011 to 1.193, which is slightly but fully above 1.

The emerging-market large-cap ETF is the most defensive asset, with a beta of 0.691 and a confidence interval from 0.514 to 0.868, which is clearly below 1. The Europe large-cap ETF and emerging-market small-cap ETF have beta point estimates below 1, but their confidence intervals include 1, so they should be interpreted as only weakly defensive. Similarly, the Europe small-cap ETF has a beta above 1, but its confidence interval includes 1, so its aggressive classification is not statistically precise. Overall, only the U.S. large-cap, U.S. small-cap, and emerging-market large-cap ETFs have beta estimates that are clearly different from 1 at the 95% confidence level.

3.3 Asset Pricing and Market Efficiency

# This section compares realized ETF excess returns with CAPM-implied excess returns.
market_mean_monthly <- mean(index_model_data$Market_Excess, na.rm = TRUE)
market_mean_annual <- market_mean_monthly * 12

T_obs <- nrow(index_model_data)

capm_table <- index_model_results %>%
  select(Asset, Beta) %>%
  left_join(
    risk_return_table %>%
      select(Asset, Mean_Excess_Return, Standard_Deviation),
    by = "Asset"
  ) %>%
  mutate(
    CAPM_Expected_Excess_Return = Beta * market_mean_annual,
    CAPM_Gap = Mean_Excess_Return - CAPM_Expected_Excess_Return,
    Mean_SE = sqrt(12) * Standard_Deviation / sqrt(T_obs),
    Gap_to_SE = CAPM_Gap / Mean_SE,
    Gap_to_Volatility = CAPM_Gap / Standard_Deviation
  )
capm_table_display <- capm_table %>%
  transmute(
    Asset,
    Beta = round(Beta, 3),
    `Realised mean excess return` = percent(Mean_Excess_Return, accuracy = 0.01),
    `CAPM-implied excess return` = percent(CAPM_Expected_Excess_Return, accuracy = 0.01),
    `CAPM gap` = percent(CAPM_Gap, accuracy = 0.01),
    `SE of realised mean` = percent(Mean_SE, accuracy = 0.01),
    `Gap / SE` = round(Gap_to_SE, 2),
    `Gap / volatility` = round(Gap_to_Volatility, 2)
  )

kable(
  capm_table_display,
  caption = "CAPM-implied excess returns versus realised excess returns",
  align = c("l", rep("r", 7))
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed"),
    font_size = 12
  ) %>%
  scroll_box(width = "100%")
CAPM-implied excess returns versus realised excess returns
Asset Beta Realised mean excess return CAPM-implied excess return CAPM gap SE of realised mean Gap / SE Gap / volatility
EU_Large_XMEU_DE 0.918 7.95% 9.71% -1.77% 6.22% -0.28 -0.12
EU_Small_SMC_PA 1.136 5.45% 12.03% -6.57% 7.93% -0.83 -0.34
US_Large_SXR8_DE 1.102 12.71% 11.66% 1.05% 6.55% 0.16 0.07
US_Small_ZPRR_DE 1.404 7.96% 14.86% -6.90% 9.39% -0.74 -0.30
EM_Large_SPYM 0.691 5.92% 7.31% -1.39% 5.73% -0.24 -0.10
EM_Small_SPYX 0.960 9.72% 10.16% -0.44% 6.65% -0.07 -0.03

The CAPM comparison shows that the EU and U.S. small-cap ETFs realised lower excess returns than implied by their market betas. The shortfalls are economically visible, at -6.57 and -6.90 percentage points, but they are small relative to the sampling uncertainty of realized mean returns, with Gap / SE values of only -0.83 and -0.74. Therefore, the evidence is not strong enough to conclude that these ETFs significantly underperformed CAPM in a statistical sense.

3.3.1 Security Market Line

capm_plot_data <- capm_table %>%
  mutate(
    Label = c("EU Large", "EU Small", "US Large", "US Small", "EM Large", "EM Small"),
    Size_Group = c("Large Cap", "Small Cap", "Large Cap", "Small Cap", "Large Cap", "Small Cap")
  )

sml_data <- tibble(
  Beta = seq(0, max(capm_plot_data$Beta) + 0.2, length.out = 100)
) %>%
  mutate(
    CAPM_Expected_Excess_Return = Beta * market_mean_annual
  )

ggplot() +
  geom_line(
    data = sml_data,
    aes(x = Beta, y = CAPM_Expected_Excess_Return),
    linewidth = 1,
    linetype = "dashed",
    color = "grey30"
  ) +
  geom_point(
    data = capm_plot_data,
    aes(x = Beta, y = Mean_Excess_Return, color = Size_Group, shape = Size_Group),
    size = 3.5
  ) +
  geom_text_repel(
    data = capm_plot_data,
    aes(x = Beta, y = Mean_Excess_Return, label = Label, color = Size_Group),
    size = 4,
    box.padding = 0.35,
    point.padding = 0.3,
    segment.color = "grey70",
    show.legend = FALSE
  ) +
  scale_color_manual(values = c("Large Cap" = "#3775B7", "Small Cap" = "#D94B45")) +
  scale_shape_manual(values = c("Large Cap" = 16, "Small Cap" = 17)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
  scale_x_continuous(expand = expansion(mult = c(0.02, 0.08))) +
  labs(
    title = "Security Market Line",
    subtitle = "Realised annualised excess returns versus CAPM-implied excess returns",
    x = "Beta",
    y = "Annualised excess return",
    color = "ETF type",
    shape = "ETF type",
    caption = "Dashed line = CAPM-implied excess return. Points above the line outperformed CAPM; points below the line underperformed CAPM."
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", size = 18),
    plot.subtitle = element_text(size = 12, color = "grey30"),
    axis.title = element_text(face = "bold"),
    legend.position = "top",
    legend.title = element_text(face = "bold"),
    panel.grid.minor = element_blank(),
    plot.caption = element_text(size = 10, color = "grey40")
  )

In this sample, the U.S. large-cap ETF lies above the Security Market Line, while the EU and U.S. small-cap ETFs lie visually below it. This means that the two small-cap ETFs earned lower realized excess returns than their CAPM betas would imply. The EM small-cap ETF is close to the SML, suggesting that its realized return was broadly consistent with CAPM. Overall, the graph does not provide strong support for a broad small-cap premium, although the deviations should be interpreted cautiously because realized mean returns are estimated with substantial sampling uncertainty.

3.4 Out-of-Sample Robustness and Estimation Error

We use an expanding-window scheme with an initial estimation window of 36 months, because it uses only information available at each point in time while still generating a sequence of out-of-sample portfolio returns. This provides a more informative robustness check than a single split between an estimation period and an evaluation period. The equally weighted 1/N portfolio is included as a simple benchmark that does not rely on estimated expected returns or covariances.

3.4.1 Methodological Choice: Expanding-Window Evaluation

# We use the long-only tangency portfolio because it is more practically implementable than the unconstrained portfolio with short positions.
R_oos <- excess_returns %>%
  arrange(Month_End) %>%
  select(Month_End, all_of(asset_cols)) %>%
  drop_na()

R_oos_matrix <- R_oos %>%
  select(all_of(asset_cols)) %>%
  as.matrix()

dates_oos <- R_oos$Month_End

initial_window <- 36
n_oos <- nrow(R_oos_matrix)
n_assets <- length(asset_cols)

# Function to compute long-only tangency weights using the same frontier approach as in Step 1
estimate_long_only_tangency <- function(train_returns, grid_length = 100) {
  
  mu_hat <- colMeans(train_returns) * 12
  Sigma_hat <- cov(train_returns) * 12
  
  names(mu_hat) <- colnames(train_returns)
  
  target_grid <- seq(
    from = min(mu_hat),
    to = max(mu_hat),
    length.out = grid_length
  )
  
  frontier_estimates <- map_dfr(
    target_grid,
    ~ solve_target_portfolio(
      target_return = .x,
      mu = mu_hat,
      Sigma = Sigma_hat,
      long_only = TRUE
    )
  )
  
  best_portfolio <- frontier_estimates %>%
    filter(Sharpe_Ratio == max(Sharpe_Ratio, na.rm = TRUE)) %>%
    slice(1)
  
  weights <- best_portfolio$Weights[[1]]
  names(weights) <- colnames(train_returns)
  
  list(
    weights = weights,
    in_sample_sharpe = best_portfolio$Sharpe_Ratio
  )
}
# T/N ratio: how reliably can the sample support N-asset portfolio optimization?
T_sample <- nrow(R_oos_matrix)
N_assets <- ncol(R_oos_matrix)
TN_ratio <- T_sample / N_assets
TN_initial <- initial_window / N_assets

tn_table <- tibble(
  Metric = c(
    "Full sample length (T)",
    "Number of assets (N)",
    "T/N ratio, full sample",
    "Initial estimation window",
    "T/N ratio, initial window",
    "Literature benchmark",
    "Assessment"
  ),
  Value = c(
    as.character(T_sample),
    as.character(N_assets),
    as.character(round(TN_ratio, 1)),
    as.character(initial_window),
    as.character(round(TN_initial, 1)),
    "High T/N often required for MV to reliably beat 1/N",
    if_else(
      TN_initial < 40,
      "Low T/N — estimation error is likely important; optimized weights should be interpreted cautiously",
      "Relatively high T/N"
    )
  )
)

kable(
  tn_table,
  caption = "Estimation error: sample length relative to number of assets",
  align = c("l", "l")
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Estimation error: sample length relative to number of assets
Metric Value
Full sample length (T) 70
Number of assets (N) 6
T/N ratio, full sample 11.7
Initial estimation window 36
T/N ratio, initial window 6
Literature benchmark High T/N often required for MV to reliably beat 1/N
Assessment Low T/N — estimation error is likely important; optimized weights should be interpreted cautiously

The T/N ratio highlights the estimation-error problem in the out-of-sample exercise. Although the full sample contains 70 monthly observations for six risky assets, the first expanding-window estimate uses only 36 months, giving an initial T/N ratio of 6.0. This is low for mean-variance optimization, especially because expected returns and covariances must be estimated from the same limited sample. Therefore, the optimized tangency portfolio weights should be interpreted cautiously, and the comparison with the naive 1/N portfolio is important.

3.4.2 Expanding-Window Out-of-Sample Portfolio Construction

# Running the expanding-window out-of-sample test
oos_results <- map_dfr(
  initial_window:(n_oos - 1),
  function(t) {
    
    train_returns <- R_oos_matrix[1:t, , drop = FALSE]
    next_return <- R_oos_matrix[t + 1, ]
    
    tangency_estimate <- estimate_long_only_tangency(train_returns)
    
    w_tangency <- tangency_estimate$weights
    w_equal <- rep(1 / n_assets, n_assets)
    names(w_equal) <- asset_cols
    
    tangency_oos_return <- sum(w_tangency * next_return)
    equal_oos_return <- sum(w_equal * next_return)
    
    tibble(
      Month_End = dates_oos[t + 1],
      In_Sample_Sharpe_Tangency = tangency_estimate$in_sample_sharpe,
      Tangency_OOS_Return = tangency_oos_return,
      Equal_Weight_OOS_Return = equal_oos_return,
      Weights_Tangency = list(w_tangency)
    )
  }
)

oos_table <- oos_results %>%
  tidyr::unnest_wider(Weights_Tangency) %>%
  rename_with(~ paste0("Weight_", .x), all_of(asset_cols)) %>%
  mutate(
    Month_End = format(Month_End, "%Y-%m-%d"),
    In_Sample_Sharpe_Tangency = round(In_Sample_Sharpe_Tangency, 3),
    Tangency_OOS_Return = scales::percent(Tangency_OOS_Return, accuracy = 0.01),
    Equal_Weight_OOS_Return = scales::percent(Equal_Weight_OOS_Return, accuracy = 0.01),
    across(starts_with("Weight_"), ~ scales::percent(.x, accuracy = 0.01))
  )

knitr::kable(
  oos_table,
  caption = "Expanding-window out-of-sample results",
  align = "c"
) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    font_size = 12
  ) %>%
  kableExtra::scroll_box(width = "100%", height = "500px")
Expanding-window out-of-sample results
Month_End In_Sample_Sharpe_Tangency Tangency_OOS_Return Equal_Weight_OOS_Return Weight_EU_Large_XMEU_DE Weight_EU_Small_SMC_PA Weight_US_Large_SXR8_DE Weight_US_Small_ZPRR_DE Weight_EM_Large_SPYM Weight_EM_Small_SPYX
2023-02-28 0.645 0.32% 0.24% 0.00% 0.00% 61.92% 0.00% 0.00% 38.08%
2023-03-31 0.644 -0.48% -2.07% 0.00% 0.00% 68.99% 0.00% 0.00% 31.01%
2023-04-30 0.629 -0.45% -0.78% 0.00% 0.00% 79.88% 0.00% 0.00% 20.12%
2023-05-31 0.615 3.85% 0.59% 0.00% 0.00% 90.60% 0.00% 0.00% 9.40%
2023-06-30 0.673 3.77% 2.95% 0.00% 0.00% 89.23% 0.00% 0.00% 10.77%
2023-07-31 0.728 2.18% 3.42% 0.00% 0.00% 91.95% 0.00% 0.00% 8.05%
2023-08-31 0.758 0.10% -2.29% 0.00% 0.00% 83.90% 0.00% 0.00% 16.10%
2023-09-30 0.750 -2.00% -1.92% 0.00% 0.00% 83.24% 0.00% 0.00% 16.76%
2023-10-31 0.705 -3.94% -5.18% 0.00% 0.00% 73.96% 0.00% 0.51% 25.53%
2023-11-30 0.628 5.49% 5.80% 0.00% 0.00% 80.52% 0.00% 0.00% 19.48%
2023-12-31 0.702 3.39% 5.04% 0.00% 0.00% 79.56% 0.00% 0.24% 20.20%
2024-01-31 0.747 3.25% 0.11% 0.00% 0.00% 84.52% 0.00% 0.00% 15.48%
2024-02-29 0.791 4.10% 2.50% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-03-31 0.842 3.34% 2.99% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-04-30 0.882 -2.46% -1.36% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-05-31 0.833 0.74% 1.63% 0.00% 0.00% 93.01% 0.00% 0.00% 6.99%
2024-06-30 0.836 6.49% 1.98% 0.00% 0.00% 93.78% 0.00% 0.00% 6.22%
2024-07-31 0.911 -0.80% 1.87% 0.00% 0.00% 95.20% 0.00% 0.00% 4.80%
2024-08-31 0.890 -1.06% -1.72% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-09-30 0.866 1.43% 1.50% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-10-31 0.878 2.38% -1.27% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-11-30 0.902 8.19% 3.83% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2024-12-31 0.984 -1.10% -1.92% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-01-31 0.959 3.59% 3.16% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-02-28 0.996 -3.86% -1.62% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-03-31 0.928 -9.21% -5.85% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-04-30 0.770 -5.56% -3.14% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-05-31 0.686 6.70% 5.99% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-06-30 0.750 1.34% 1.26% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-07-31 0.760 5.93% 3.29% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-08-31 0.815 -1.27% 0.58% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-09-30 0.793 2.65% 2.27% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-10-31 0.816 4.58% 3.54% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%
2025-11-30 0.857 -3.69% -3.07% 0.00% 0.00% 100.00% 0.00% 0.00% 0.00%

3.4.3 Out-of-Sample Performance: Tangency Portfolio vs. 1/N Benchmark

# Summary table for the out-of-sample robustness taslk
oos_summary_table <- tibble(
  Portfolio = c(
    "Tangency",
    "1/N"
  ),
  `Mean monthly ER` = c(
    mean(oos_results$Tangency_OOS_Return, na.rm = TRUE),
    mean(oos_results$Equal_Weight_OOS_Return, na.rm = TRUE)
  ),
  `Monthly SD` = c(
    sd(oos_results$Tangency_OOS_Return, na.rm = TRUE),
    sd(oos_results$Equal_Weight_OOS_Return, na.rm = TRUE)
  )
) %>%
  mutate(
    `Ann. mean ER` = `Mean monthly ER` * 12,
    `Ann. SD` = `Monthly SD` * sqrt(12),
    `OOS Sharpe` = `Ann. mean ER` / `Ann. SD`,
    `Mean monthly ER` = scales::percent(`Mean monthly ER`, accuracy = 0.01),
    `Monthly SD` = scales::percent(`Monthly SD`, accuracy = 0.01),
    `Ann. mean ER` = scales::percent(`Ann. mean ER`, accuracy = 0.01),
    `Ann. SD` = scales::percent(`Ann. SD`, accuracy = 0.01),
    `OOS Sharpe` = round(`OOS Sharpe`, 3)
  )

knitr::kable(
  oos_summary_table,
  caption = "Out-of-sample performance of the tangency and 1/N portfolios",
  align = "c"
) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Out-of-sample performance of the tangency and 1/N portfolios
Portfolio Mean monthly ER Monthly SD Ann. mean ER Ann. SD OOS Sharpe
Tangency 1.12% 3.84% 13.38% 13.30% 1.006
1/N 0.66% 2.97% 7.89% 10.28% 0.767

3.4.4 Concentration of the Tangency Portfolio Weights

# Summary of tangency portfolio weights across the out-of-sample period
weight_summary_table <- oos_results %>%
  tidyr::unnest_wider(Weights_Tangency) %>%
  select(all_of(asset_cols)) %>%
  summarise(
    across(
      everything(),
      list(
        Average = ~ mean(.x, na.rm = TRUE),
        Minimum = ~ min(.x, na.rm = TRUE),
        Maximum = ~ max(.x, na.rm = TRUE)
      )
    )
  ) %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Asset", "Statistic"),
    names_pattern = "(.+)_(Average|Minimum|Maximum)",
    values_to = "Weight"
  ) %>%
  pivot_wider(
    names_from = Statistic,
    values_from = Weight
  ) %>%
  mutate(
    Average = scales::percent(Average, accuracy = 0.01),
    Minimum = scales::percent(Minimum, accuracy = 0.01),
    Maximum = scales::percent(Maximum, accuracy = 0.01)
  )

knitr::kable(
  weight_summary_table,
  caption = "Summary of expanding-window tangency portfolio weights",
  align = "c"
) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Summary of expanding-window tangency portfolio weights
Asset Average Minimum Maximum
EU_Large_XMEU_DE 0.00% 0.00% 0.00%
EU_Small_SMC_PA 0.00% 0.00% 0.00%
US_Large_SXR8_DE 92.65% 61.92% 100.00%
US_Small_ZPRR_DE 0.00% 0.00% 0.00%
EM_Large_SPYM 0.02% 0.00% 0.51%
EM_Small_SPYX 7.32% 0.00% 38.08%

The expanding-window results show that the long-only tangency portfolio outperformed the 1/N benchmark out of sample in both average excess return and Sharpe ratio, although with higher volatility. However, the weight summary shows that this result was driven mainly by a very large allocation to the U.S. large-cap ETF. Therefore, the result should be interpreted cautiously: the tangency portfolio performed well in this sample, but it was highly concentrated and remains sensitive to estimation error and the specific market environment.

3.4.5 In-Sample to Out-of-Sample Sharpe Ratio Deterioration

# Out-of-sample Sharpe ratios

tangency_oos_sharpe <- mean(oos_results$Tangency_OOS_Return, na.rm = TRUE) * 12 /
  (sd(oos_results$Tangency_OOS_Return, na.rm = TRUE) * sqrt(12))

equal_oos_sharpe <- mean(oos_results$Equal_Weight_OOS_Return, na.rm = TRUE) * 12 /
  (sd(oos_results$Equal_Weight_OOS_Return, na.rm = TRUE) * sqrt(12))


# For the deterioration comparison, we used the average in-sample Sharpe ratio from the expanding-window optimization, not the full-sample tangency Sharpe.
average_in_sample_sharpe <- mean(
  oos_results$In_Sample_Sharpe_Tangency,
  na.rm = TRUE
)

deterioration_table <- tibble(
  Portfolio = "Long-only tangency",
  `Average expanding-window in-sample Sharpe` = average_in_sample_sharpe,
  `OOS Sharpe` = tangency_oos_sharpe,
  `Deterioration` = average_in_sample_sharpe - tangency_oos_sharpe,
  `Survival ratio` = tangency_oos_sharpe / average_in_sample_sharpe
) %>%
  mutate(
    `Average expanding-window in-sample Sharpe` = round(`Average expanding-window in-sample Sharpe`, 3),
    `OOS Sharpe` = round(`OOS Sharpe`, 3),
    `Deterioration` = round(`Deterioration`, 3),
    `Survival ratio` = scales::percent(`Survival ratio`, accuracy = 0.1)
  )

knitr::kable(
  deterioration_table,
  caption = "In-sample to out-of-sample Sharpe ratio deterioration",
  align = "c"
) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
In-sample to out-of-sample Sharpe ratio deterioration
Portfolio Average expanding-window in-sample Sharpe OOS Sharpe Deterioration Survival ratio
Long-only tangency 0.793 1.006 -0.213 126.8%

The deterioration table shows that the long-only tangency portfolio did not lose performance out of sample. Its average expanding-window in-sample Sharpe ratio was 0.793, while the realized out-of-sample Sharpe ratio was 1.006. The negative deterioration of -0.213 therefore indicates that out-of-sample performance was stronger than the average in-sample estimate, with 126.8% of the in-sample Sharpe ratio surviving. This result suggests that the evaluation period was favorable for the assets selected by the optimizer.

3.4.6 Test of Equal Out-of-Sample Sharpe Ratios

# Jobson-Korkie test with Memmel correction for equal Sharpe ratios
tangency_oos <- oos_results$Tangency_OOS_Return
equal_oos <- oos_results$Equal_Weight_OOS_Return

jk_data <- tibble(
  Tangency = tangency_oos,
  Equal_Weight = equal_oos
) %>%
  drop_na()

r1 <- jk_data$Tangency
r2 <- jk_data$Equal_Weight

T_oos <- length(r1)

mean_1 <- mean(r1)
mean_2 <- mean(r2)

sd_1 <- sd(r1)
sd_2 <- sd(r2)

sr_1 <- mean_1 / sd_1
sr_2 <- mean_2 / sd_2

rho_12 <- cor(r1, r2)

# Memmel (2003) correction to the Jobson-Korkie test
jk_memmel_stat <- (sr_1 - sr_2) / sqrt(
  (1 / T_oos) * (
    2 * (1 - rho_12) +
      0.5 * sr_1^2 +
      0.5 * sr_2^2 -
      sr_1 * sr_2 * rho_12^2
  )
)

jk_memmel_p_value <- 2 * (1 - pnorm(abs(jk_memmel_stat)))

sharpe_test_table <- tibble(
  Test = "Jobson-Korkie with Memmel correction",
  `Tangency OOS Sharpe` = sr_1 * sqrt(12),
  `1/N OOS Sharpe` = sr_2 * sqrt(12),
  `Test statistic` = jk_memmel_stat,
  `p-value` = jk_memmel_p_value
) %>%
  mutate(
    `Tangency OOS Sharpe` = round(`Tangency OOS Sharpe`, 3),
    `1/N OOS Sharpe` = round(`1/N OOS Sharpe`, 3),
    `Test statistic` = round(`Test statistic`, 3),
    `p-value` = round(`p-value`, 4)
  )

knitr::kable(
  sharpe_test_table,
  caption = "Test of equal out-of-sample Sharpe ratios",
  align = "c"
) %>%
  kableExtra::kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Test of equal out-of-sample Sharpe ratios
Test Tangency OOS Sharpe 1/N OOS Sharpe Test statistic p-value
Jobson-Korkie with Memmel correction 1.006 0.767 0.741 0.4588

Although the tangency portfolio has a higher out-of-sample Sharpe ratio than the 1/N portfolio, the Jobson-Korkie test with Memmel correction shows that the difference is not statistically significant. The test statistic is 0.741 and the p-value is 0.4588, which is far above the 5% significance level. Therefore, we do not reject the null hypothesis that the two portfolios have equal out-of-sample Sharpe ratios. Economically, the tangency portfolio performed better in this sample, but statistically we cannot conclude that it reliably outperformed the simpler 1/N benchmark.

3.5 Structural considerations and final recommendation

3.5.1 Full-sample small-minus-large spread

# This section evaluates whether the small-cap premium appears stable over time.
# For each region, we compute the monthly small-minus-large spread:small-cap ETF excess return minus large-cap ETF excess return.
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(knitr)
library(kableExtra)
library(zoo)

# Constructing regional small-minus-large spreads using real monthly dates
small_large_spreads <- tibble(
  Date = as.Date(excess_returns$Month_End),
  Europe = as.numeric(R_excess[, "EU_Small_SMC_PA"] - R_excess[, "EU_Large_XMEU_DE"]),
  `United States` = as.numeric(R_excess[, "US_Small_ZPRR_DE"] - R_excess[, "US_Large_SXR8_DE"]),
  `Emerging markets` = as.numeric(R_excess[, "EM_Small_SPYX"] - R_excess[, "EM_Large_SPYM"])
) %>%
  drop_na()

small_large_spreads_long <- small_large_spreads %>%
  pivot_longer(
    cols = -Date,
    names_to = "Region",
    values_to = "Small_Minus_Large"
  )

# Full-sample spread table
spread_summary_table <- small_large_spreads_long %>%
  group_by(Region) %>%
  summarise(
    `Annualised mean spread` = mean(Small_Minus_Large, na.rm = TRUE) * 12,
    `Annualised volatility` = sd(Small_Minus_Large, na.rm = TRUE) * sqrt(12),
    `Spread Sharpe ratio` = `Annualised mean spread` / `Annualised volatility`,
    `Positive months` = mean(Small_Minus_Large > 0, na.rm = TRUE),
    .groups = "drop"
  )

spread_summary_display <- spread_summary_table %>%
  mutate(
    `Annualised mean spread` = percent(`Annualised mean spread`, accuracy = 0.01),
    `Annualised volatility` = percent(`Annualised volatility`, accuracy = 0.01),
    `Spread Sharpe ratio` = round(`Spread Sharpe ratio`, 3),
    `Positive months` = percent(`Positive months`, accuracy = 0.1)
  )

kable(
  spread_summary_display,
  caption = "Full-sample small-minus-large return spread by region"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Full-sample small-minus-large return spread by region
Region Annualised mean spread Annualised volatility Spread Sharpe ratio Positive months
Emerging markets 3.80% 9.15% 0.415 55.7%
Europe -2.49% 7.35% -0.339 47.1%
United States -4.75% 13.11% -0.363 41.4%
write.csv(
  spread_summary_table,
  file = file.path(output_dir, "step5_small_minus_large_full_sample.csv"),
  row.names = FALSE
)
# One-sample t-test for each region: H0 = small-minus-large spread is zero.
# The test is applied to monthly small-minus-large return spreads.
# The annualized mean is mean_monthly * 12.
# The standard error of the annualized mean is se_monthly * 12.
spread_ttest <- small_large_spreads_long %>%
  group_by(Region) %>%
  summarise(
    n_months        = n(),
    mean_monthly    = mean(Small_Minus_Large, na.rm = TRUE),
    sd_monthly      = sd(Small_Minus_Large, na.rm = TRUE),
    se_monthly      = sd_monthly / sqrt(n_months),
    t_stat          = mean_monthly / se_monthly,
    p_value         = 2 * pt(-abs(t_stat), df = n_months - 1),
    ann_mean        = mean_monthly * 12,
    ann_se          = se_monthly * 12,
    ci_lower_ann    = ann_mean - qt(0.975, df = n_months - 1) * ann_se,
    ci_upper_ann    = ann_mean + qt(0.975, df = n_months - 1) * ann_se,
    .groups = "drop"
  ) %>%
  mutate(
    Significance = case_when(
      p_value < 0.01 ~ "p < 0.01",
      p_value < 0.05 ~ "p < 0.05",
      p_value < 0.10 ~ "p < 0.10",
      TRUE           ~ "Not significant"
    )
  )

spread_ttest_display <- spread_ttest %>%
  transmute(
    Region,
    `Ann. mean spread`  = percent(ann_mean,     accuracy = 0.01),
    `Ann. SE`           = percent(ann_se,        accuracy = 0.01),
    `95% CI lower`      = percent(ci_lower_ann,  accuracy = 0.01),
    `95% CI upper`      = percent(ci_upper_ann,  accuracy = 0.01),
    `t-statistic`       = round(t_stat,  3),
    `p-value`           = round(p_value, 4),
    Significance
  )

kable(
  spread_ttest_display,
  caption = "One-sample t-test: annualized small-minus-large spread versus zero"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
One-sample t-test: annualized small-minus-large spread versus zero
Region Ann. mean spread Ann. SE 95% CI lower 95% CI upper t-statistic p-value Significance
Emerging markets 3.80% 3.79% -3.77% 11.36% 1.001 0.3201 Not significant
Europe -2.49% 3.04% -8.56% 3.58% -0.819 0.4158 Not significant
United States -4.75% 5.43% -15.58% 6.07% -0.876 0.3842 Not significant

The full-sample spread table shows mixed evidence for the small-cap premium. Emerging markets have a positive annualized small-minus-large spread of 3.80%, meaning that small caps outperformed large caps in this region. In contrast, Europe and the United States show negative spreads of -2.49% and -4.75%, respectively. However, the one-sample t-tests show that none of the regional spreads are statistically different from zero at conventional significance levels. Therefore, the evidence does not support a statistically robust small-cap premium across regions in our sample.

3.5.2 Subperiod evidence: Has the premium weakened?

split_date <- small_large_spreads$Date[ceiling(nrow(small_large_spreads) / 2)]

spread_subperiod_table <- small_large_spreads_long %>%
  mutate(
    Period = if_else(Date < split_date, "First half", "Second half")
  ) %>%
  group_by(Region, Period) %>%
  summarise(
    `Annualised mean spread` = mean(Small_Minus_Large, na.rm = TRUE) * 12,
    `Annualised volatility` = sd(Small_Minus_Large, na.rm = TRUE) * sqrt(12),
    `Spread Sharpe ratio` = `Annualised mean spread` / `Annualised volatility`,
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = Period,
    values_from = c(
      `Annualised mean spread`,
      `Annualised volatility`,
      `Spread Sharpe ratio`
    )
  ) %>%
  mutate(
    `Change in mean spread` =
      `Annualised mean spread_Second half` - `Annualised mean spread_First half`,
    Interpretation = case_when(
  `Annualised mean spread_First half` > 0 &
    `Annualised mean spread_Second half` <= 0 ~ "Premium disappeared",
  
  `Annualised mean spread_First half` <= 0 &
    `Annualised mean spread_Second half` <= 0 &
    `Annualised mean spread_Second half` > `Annualised mean spread_First half` ~ "Still negative, slightly improved",
  
  `Annualised mean spread_First half` <= 0 &
    `Annualised mean spread_Second half` <= 0 &
    `Annualised mean spread_Second half` < `Annualised mean spread_First half` ~ "Still negative, weakened",
  
  `Annualised mean spread_Second half` < `Annualised mean spread_First half` ~ "Premium weakened",
  
  `Annualised mean spread_Second half` > `Annualised mean spread_First half` ~ "Premium strengthened",
  
  TRUE ~ "Mixed")
  )

spread_subperiod_display <- spread_subperiod_table %>%
  transmute(
    Region,
    `First-half mean spread` = percent(`Annualised mean spread_First half`, accuracy = 0.01),
    `Second-half mean spread` = percent(`Annualised mean spread_Second half`, accuracy = 0.01),
    `Change` = percent(`Change in mean spread`, accuracy = 0.01),
    `First-half Sharpe` = round(`Spread Sharpe ratio_First half`, 3),
    `Second-half Sharpe` = round(`Spread Sharpe ratio_Second half`, 3),
    Interpretation
  )

kable(
  spread_subperiod_display,
  caption = "Subperiod comparison of the small-minus-large spread"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  )
Subperiod comparison of the small-minus-large spread
Region First-half mean spread Second-half mean spread Change First-half Sharpe Second-half Sharpe Interpretation
Emerging markets 8.66% -0.80% -9.46% 0.832 -0.104 Premium disappeared
Europe -2.20% -2.77% -0.57% -0.272 -0.413 Still negative, weakened
United States -1.59% -7.75% -6.16% -0.120 -0.588 Still negative, weakened
write.csv(
  spread_subperiod_table,
  file = file.path(output_dir, "step5_small_minus_large_subperiods.csv"),
  row.names = FALSE
)

The subperiod comparison suggests that the small-cap premium was not stable over time. In emerging markets, the premium was positive in the first half but turned negative in the second half, meaning it disappeared. Europe remained negative in both periods and weakened slightly further in the second half. The United States also remained negative and weakened substantially in the second half.

3.5.3 Rolling evidence of small-cap outperformance

# Rolling 36-month small-minus-large spread
rolling_window <- 36

rolling_spreads <- small_large_spreads_long %>%
  arrange(Region, Date) %>%
  group_by(Region) %>%
  mutate(
    `Rolling annualised spread` = zoo::rollapply(
      Small_Minus_Large,
      width = rolling_window,
      FUN = function(x) mean(x, na.rm = TRUE) * 12,
      fill = NA,
      align = "right"
    )
  ) %>%
  ungroup() %>%
  drop_na(`Rolling annualised spread`)

rolling_spread_plot <- ggplot(
  rolling_spreads,
  aes(
    x = Date,
    y = `Rolling annualised spread`,
    color = Region
  )
) +
  geom_hline(yintercept = 0, linewidth = 0.4, color = "grey50") +
  geom_line(linewidth = 1) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Rolling 36-month small-minus-large return spread",
    subtitle = "Positive values indicate small-cap outperformance relative to large-cap ETFs",
    x = NULL,
    y = "Annualised small-minus-large spread",
    color = "Region"
  ) +
  theme_minimal()

rolling_spread_plot

ggsave(
  filename = file.path(output_dir, "step5_rolling_small_minus_large_spread.png"),
  plot = rolling_spread_plot,
  width = 9,
  height = 5.5,
  dpi = 300
)

The rolling 36-month spread graph confirms that small-cap outperformance was unstable. Emerging markets show strong positive rolling spreads at the beginning, but the advantage declines and turns negative near the end of the sample. Europe and the United States remain mostly below zero, indicating persistent small-cap underperformance relative to large caps.

3.5.4 Cumulative relative performance

# Cumulative relative performance of small caps versus large caps
cumulative_relative <- returns_data %>%
  transmute(
    Date = Month_End,
    Europe = cumprod(1 + EU_Small_SMC_PA) / cumprod(1 + EU_Large_XMEU_DE) - 1,
    `United States` = cumprod(1 + US_Small_ZPRR_DE) / cumprod(1 + US_Large_SXR8_DE) - 1,
    `Emerging markets` = cumprod(1 + EM_Small_SPYX) / cumprod(1 + EM_Large_SPYM) - 1
  ) %>%
  pivot_longer(
    cols = -Date,
    names_to = "Region",
    values_to = "Cumulative_Relative_Performance"
  )

cumulative_spread_plot <- ggplot(
  cumulative_relative,
  aes(
    x = Date,
    y = Cumulative_Relative_Performance,
    color = Region
  )
) +
  geom_hline(yintercept = 0, linewidth = 0.4, color = "grey50") +
  geom_line(linewidth = 1) +
  scale_y_continuous(labels = percent_format(accuracy = 1)) +
  labs(
    title = "Cumulative small-cap performance relative to large-cap ETFs",
    subtitle = "The line rises when small-cap cumulative wealth exceeds large-cap cumulative wealth",
    x = NULL,
    y = "Cumulative relative performance",
    color = "Region"
  ) +
  theme_minimal()

cumulative_spread_plot

ggsave(
  filename = file.path(output_dir, "step5_cumulative_small_minus_large_spread.png"),
  plot = cumulative_spread_plot,
  width = 9,
  height = 5.5,
  dpi = 300
)

The cumulative relative performance graph shows the realized investor experience from holding small caps instead of large caps. Emerging-market small caps end above large caps, but their advantage falls after peaking during the sample. Europe and the United States end below zero, meaning small caps underperformed their large-cap benchmarks over the full period. Overall, the graph supports a cautious view of the small-cap premium.

3.5.5 Summary of evidence and final recommendation

recommendation_table <- tibble(
  Evidence = c(
    "Risk-return profile",
    "Direct small-minus-large comparison",
    "CAPM and Security Market Line",
    "Out-of-sample robustness",
    "Subperiod and rolling evidence",
    "Final investment implication"
  ),
  Result = c(
    "Small-cap ETFs did not consistently deliver higher risk-adjusted returns than their large-cap counterparts.",
    "Small caps did not outperform large caps consistently across regions.",
    "The small-cap ETFs do not show clear, broad evidence of abnormal performance relative to CAPM-implied returns.",
    "Mean-variance results should be interpreted cautiously because optimized portfolios are sensitive to estimation error.",
    "The premium appears unstable over time and differs across regions.",
    "A small allocation may be justified for diversification, but the evidence does not support small caps as a strong standalone return-enhancement strategy."
  )
)

kable(
  recommendation_table,
  caption = "Summary of evidence for the final recommendation"
) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed")
  ) %>%
  column_spec(1, bold = TRUE)
Summary of evidence for the final recommendation
Evidence Result
Risk-return profile Small-cap ETFs did not consistently deliver higher risk-adjusted returns than their large-cap counterparts.
Direct small-minus-large comparison Small caps did not outperform large caps consistently across regions.
CAPM and Security Market Line The small-cap ETFs do not show clear, broad evidence of abnormal performance relative to CAPM-implied returns.
Out-of-sample robustness Mean-variance results should be interpreted cautiously because optimized portfolios are sensitive to estimation error.
Subperiod and rolling evidence The premium appears unstable over time and differs across regions.
Final investment implication A small allocation may be justified for diversification, but the evidence does not support small caps as a strong standalone return-enhancement strategy.