2. Descriptive statistics
# ============================================================
# 2. DESCRIPTIVE STATISTICS (THESIS-READY, SELF-CONTAINED)
# ============================================================
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(scales)
library(zoo)
## Warning: package 'zoo' was built under R version 4.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(stringr)
library(patchwork)
library(ggrepel)
library(gt)
## Warning: package 'gt' was built under R version 4.4.3
# --- Optional for skew/kurtosis (clean + standard) ---
if (!requireNamespace("moments", quietly = TRUE)) install.packages("moments")
library(moments)
# -----------------------------
# 0) Local thesis theme (so this chunk runs standalone)
# -----------------------------
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# -----------------------------
# 1) Helpers
# -----------------------------
# Quarter label as YYYYQ#
to_qid <- function(d) paste0(year(d), "Q", quarter(d))
# Annualization from quarterly log returns
ann_ret_from_log <- function(rq) exp(4 * mean(rq, na.rm = TRUE)) - 1
ann_vol_from_log <- function(rq) sd(rq, na.rm = TRUE) * sqrt(4)
ann_rf_from_log <- function(rf_q_log) exp(4 * mean(rf_q_log, na.rm = TRUE)) - 1
# Lag-1 autocorrelation
acf1 <- function(x) {
x <- x[is.finite(x)]
if (length(x) < 3) return(NA_real_)
cor(x[-1], x[-length(x)], use = "complete.obs")
}
# Wealth index from log returns
wealth_index <- function(r_log) 100 * exp(cumsum(replace_na(r_log, 0)))
# Drawdown from wealth index
drawdown_from_wealth <- function(w) (w / cummax(w)) - 1
# Max drawdown + time-to-recovery (quarters from trough to first recovery to prior peak)
mdd_and_recovery <- function(date_q, r_log) {
ok <- is.finite(r_log) & is.finite(date_q)
date_q <- date_q[ok]
r_log <- r_log[ok]
if (length(r_log) < 3) return(tibble(mdd = NA_real_, ttr_q = NA_real_))
w <- wealth_index(r_log)
peak <- cummax(w)
dd <- drawdown_from_wealth(w)
i_trough <- which.min(dd)
mdd <- dd[i_trough]
peak_before <- peak[i_trough]
if (i_trough < length(w)) {
i_rec <- which(w[(i_trough + 1):length(w)] >= peak_before)
ttr_q <- if (length(i_rec) == 0) NA_real_ else i_rec[1]
} else {
ttr_q <- NA_real_
}
tibble(mdd = mdd, ttr_q = ttr_q)
}
# Formatting rules
fmt_pct1 <- function(x) ifelse(is.finite(x), percent(x, accuracy = 0.1), NA_character_)
fmt_num2 <- function(x) ifelse(is.finite(x), formatC(x, format = "f", digits = 2), NA_character_)
fmt_int <- function(x) ifelse(is.finite(x), as.character(as.integer(x)), NA_character_)
# -----------------------------
# 2) Build returns ONCE (nominal + real), inside this chunk
# This avoids the "df not found" problem when knitting.
# -----------------------------
y0_ann <- 0.04 # baseline gross annual rental yield (consistent with your later code)
df_desc <- master %>%
arrange(date_q) %>%
# keep only what we need (safer + clearer)
select(
date_q,
sbitop_close, dy_eq_ann,
hicp_all, hicp_rent,
hpi_si_eurostat,
hpi_si_surs_used,
hpi_lj_surs_used,
hpi_si_exlj_surs_used,
rf_log_q
)
# Robust anchor quarter for rent/price yield model
base_anchor <- df_desc %>%
filter(
!is.na(hicp_rent),
!is.na(hpi_si_eurostat),
!is.na(hpi_si_surs_used),
!is.na(hpi_lj_surs_used),
!is.na(hpi_si_exlj_surs_used)
) %>%
slice(1)
R0 <- base_anchor$hicp_rent
He0 <- base_anchor$hpi_si_eurostat
Hs0 <- base_anchor$hpi_si_surs_used
Hl0 <- base_anchor$hpi_lj_surs_used
Hx0 <- base_anchor$hpi_si_exlj_surs_used
df_desc <- df_desc %>%
mutate(
# Inflation (quarterly log change)
pi_log = log(hicp_all / lag(hicp_all)),
# Equity: nominal total log return = log price return + DY/4
dy_eq_q = dy_eq_ann / 4,
r_eq_price_log = log(sbitop_close / lag(sbitop_close)),
r_eq_total_log = r_eq_price_log + dy_eq_q,
# Real estate price log returns
r_re_eu_price_log = log(hpi_si_eurostat / lag(hpi_si_eurostat)),
r_re_si_price_log = log(hpi_si_surs_used / lag(hpi_si_surs_used)),
r_re_lj_price_log = log(hpi_lj_surs_used / lag(hpi_lj_surs_used)),
r_re_ex_price_log = log(hpi_si_exlj_surs_used / lag(hpi_si_exlj_surs_used)),
# Time-varying rental yields via rent/price ratio (anchored at base quarter)
y_eu_t = y0_ann * ((hicp_rent / R0) / (hpi_si_eurostat / He0)),
y_si_t = y0_ann * ((hicp_rent / R0) / (hpi_si_surs_used / Hs0)),
y_lj_t = y0_ann * ((hicp_rent / R0) / (hpi_lj_surs_used / Hl0)),
y_ex_t = y0_ann * ((hicp_rent / R0) / (hpi_si_exlj_surs_used / Hx0)),
# Quarterly rental income (lagged to avoid look-ahead)
r_re_eu_income_q = lag(y_eu_t) / 4,
r_re_si_income_q = lag(y_si_t) / 4,
r_re_lj_income_q = lag(y_lj_t) / 4,
r_re_ex_income_q = lag(y_ex_t) / 4,
# Real estate: nominal total log returns
r_re_eu_total_log = r_re_eu_price_log + r_re_eu_income_q,
r_re_si_total_log = r_re_si_price_log + r_re_si_income_q,
r_re_lj_total_log = r_re_lj_price_log + r_re_lj_income_q,
r_re_ex_total_log = r_re_ex_price_log + r_re_ex_income_q,
# Real total log returns (deflated)
r_eq_total_real_log = r_eq_total_log - pi_log,
r_re_eu_total_real_log = r_re_eu_total_log - pi_log,
r_re_si_total_real_log = r_re_si_total_log - pi_log,
r_re_lj_total_real_log = r_re_lj_total_log - pi_log,
r_re_ex_total_real_log = r_re_ex_total_log - pi_log
)
# -----------------------------
# 3) Long panels for stats
# -----------------------------
series_map <- tibble::tribble(
~key, ~label,
"r_eq_total_log", "Equities: SBITOP",
"r_re_eu_total_log", "Real estate: Slovenia (Eurostat)",
"r_re_si_total_log", "Real estate: Slovenia (SURS)",
"r_re_lj_total_log", "Real estate: Ljubljana (SURS)",
"r_re_ex_total_log", "Real estate: Slovenia ex-LJ (SURS)"
)
series_map_real <- tibble::tribble(
~key, ~label,
"r_eq_total_real_log", "Equities: SBITOP",
"r_re_eu_total_real_log", "Real estate: Slovenia (Eurostat)",
"r_re_si_total_real_log", "Real estate: Slovenia (SURS)",
"r_re_lj_total_real_log", "Real estate: Ljubljana (SURS)",
"r_re_ex_total_real_log", "Real estate: Slovenia ex-LJ (SURS)"
)
ret_nom_long <- df_desc %>%
select(date_q, rf_log_q, all_of(series_map$key)) %>%
pivot_longer(cols = all_of(series_map$key), names_to = "key", values_to = "r_log") %>%
left_join(series_map, by = "key") %>%
transmute(
date_q, series = label,
r_log,
r_simple = exp(r_log) - 1, # for percent-style quartiles/min/max
rf_log_q
) %>%
filter(is.finite(r_log), is.finite(rf_log_q), is.finite(r_simple))
ret_real_long <- df_desc %>%
select(date_q, rf_log_q, all_of(series_map_real$key)) %>%
pivot_longer(cols = all_of(series_map_real$key), names_to = "key", values_to = "r_log") %>%
left_join(series_map_real, by = "key") %>%
transmute(
date_q, series = label,
r_log,
r_simple = exp(r_log) - 1,
rf_log_q
) %>%
filter(is.finite(r_log), is.finite(rf_log_q), is.finite(r_simple))
# -----------------------------
# 4) TABLE 1 — Descriptive statistics
# Rules enforced:
# - Start/End shown as YYYYQ#
# - Sharpe (ann.) uses rf from rf_log_q (annualized)
# - Percent columns shown with 1 decimal
# - Skew/kurt with 2 decimals
# - Full borders + readable alignment
# - Subtitle clarifies excess kurtosis (k-3)
# -----------------------------
make_desc_block <- function(ret_long, block_name) {
core <- ret_long %>%
group_by(series) %>%
summarise(
n_q = n(),
start = min(date_q),
end = max(date_q),
# Tails / min / max on SIMPLE quarterly returns (matches percent interpretation)
p05_q = quantile(r_simple, 0.05, na.rm = TRUE, type = 7),
med_q = median(r_simple, na.rm = TRUE),
p95_q = quantile(r_simple, 0.95, na.rm = TRUE, type = 7),
min_q = min(r_simple, na.rm = TRUE),
max_q = max(r_simple, na.rm = TRUE),
# Distribution shape (use log returns; stable + standard)
skew = moments::skewness(r_log, na.rm = TRUE),
ex_kurt = moments::kurtosis(r_log, na.rm = TRUE) - 3,
# Annualized return/vol from LOG returns
ann_ret = ann_ret_from_log(r_log),
ann_vol = ann_vol_from_log(r_log),
# Annualized rf from LOG rf aligned to the same quarters
ann_rf = ann_rf_from_log(rf_log_q),
# Sharpe (annualized)
sharpe = (ann_ret - ann_rf) / ann_vol,
# Lag-1 autocorr on log returns
acf1_q = acf1(r_log),
.groups = "drop"
)
dd <- ret_long %>%
group_by(series) %>%
summarise(tmp = list(mdd_and_recovery(date_q, r_log)), .groups = "drop") %>%
tidyr::unnest(tmp)
core %>%
left_join(dd, by = "series") %>%
mutate(
block = block_name,
Start = to_qid(start),
End = to_qid(end)
) %>%
select(
block, Series = series,
n_q, Start, End,
ann_ret, ann_vol, ann_rf, sharpe,
p05_q, med_q, p95_q,
acf1_q, skew, ex_kurt,
mdd, ttr_q,
min_q, max_q
)
}
desc_nom <- make_desc_block(ret_nom_long, "Nominal")
desc_real <- make_desc_block(ret_real_long, "Real (inflation-adjusted)")
desc_all <- bind_rows(desc_nom, desc_real) %>%
mutate(
`N (quarters)` = n_q,
`Annualized return` = fmt_pct1(ann_ret),
`Annualized volatility` = fmt_pct1(ann_vol),
`Annualized rf` = fmt_pct1(ann_rf),
`Sharpe (ann.)` = fmt_num2(sharpe),
`P5 (qtr)` = fmt_pct1(p05_q),
`Median (qtr)` = fmt_pct1(med_q),
`P95 (qtr)` = fmt_pct1(p95_q),
`ACF(1)` = fmt_num2(acf1_q),
Skewness = fmt_num2(skew),
`Excess kurtosis` = fmt_num2(ex_kurt),
`Max drawdown` = fmt_pct1(mdd),
`Time to recovery (qtrs)` = fmt_int(ttr_q),
`Min (qtr)` = fmt_pct1(min_q),
`Max (qtr)` = fmt_pct1(max_q)
) %>%
select(
block, Series,
`N (quarters)`, Start, End,
`Annualized return`, `Annualized volatility`, `Annualized rf`, `Sharpe (ann.)`,
`P5 (qtr)`, `Median (qtr)`, `P95 (qtr)`,
`ACF(1)`, Skewness, `Excess kurtosis`,
`Max drawdown`, `Time to recovery (qtrs)`,
`Min (qtr)`, `Max (qtr)`
)
tab_2_1 <- desc_all %>%
gt(groupname_col = "block") %>%
cols_align(align = "left", columns = c(Series, Start, End)) %>%
cols_align(align = "center", columns = -c(Series, Start, End)) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_row_groups()
) %>%
# Full borders + clear grid
tab_options(
table.border.top.width = px(1),
table.border.bottom.width = px(1),
table.border.left.width = px(1),
table.border.right.width = px(1),
table_body.hlines.width = px(1),
table_body.vlines.width = px(1),
heading.border.bottom.width = px(1),
row_group.border.top.width = px(1),
row_group.border.bottom.width = px(1),
data_row.padding = px(6),
heading.title.font.size = px(14),
heading.subtitle.font.size = px(11)
)
tab_2_1
| Series |
N (quarters) |
Start |
End |
Annualized return |
Annualized volatility |
Annualized rf |
Sharpe (ann.) |
P5 (qtr) |
Median (qtr) |
P95 (qtr) |
ACF(1) |
Skewness |
Excess kurtosis |
Max drawdown |
Time to recovery (qtrs) |
Min (qtr) |
Max (qtr) |
| Nominal |
| Equities: SBITOP |
59 |
2010Q2 |
2024Q4 |
9.3% |
16.9% |
0.3% |
0.53 |
-12.4% |
3.2% |
16.0% |
0.06 |
-0.57 |
0.12 |
-37.0% |
8 |
-19.9% |
18.6% |
| Real estate: Ljubljana (SURS) |
59 |
2010Q2 |
2024Q4 |
8.0% |
5.2% |
0.3% |
1.48 |
-2.5% |
2.0% |
5.7% |
0.42 |
-0.83 |
0.91 |
-14.9% |
7 |
-6.6% |
6.3% |
| Real estate: Slovenia (Eurostat) |
59 |
2010Q2 |
2024Q4 |
8.3% |
4.3% |
0.3% |
1.86 |
-2.5% |
2.6% |
5.2% |
0.38 |
-1.03 |
1.09 |
-8.2% |
7 |
-4.8% |
5.7% |
| Real estate: Slovenia (SURS) |
59 |
2010Q2 |
2024Q4 |
8.6% |
4.4% |
0.3% |
1.89 |
-1.6% |
2.4% |
5.2% |
0.50 |
-0.59 |
0.16 |
-8.2% |
5 |
-4.2% |
6.4% |
| Real estate: Slovenia ex-LJ (SURS) |
59 |
2010Q2 |
2024Q4 |
8.9% |
4.8% |
0.3% |
1.80 |
-2.1% |
2.7% |
5.9% |
0.40 |
-0.56 |
0.23 |
-6.5% |
3 |
-4.2% |
7.0% |
| Real (inflation-adjusted) |
| Equities: SBITOP |
59 |
2010Q2 |
2024Q4 |
7.0% |
16.9% |
0.3% |
0.39 |
-12.9% |
2.8% |
14.6% |
0.10 |
-0.57 |
-0.11 |
-39.4% |
9 |
-19.3% |
17.9% |
| Real estate: Ljubljana (SURS) |
59 |
2010Q2 |
2024Q4 |
5.7% |
5.5% |
0.3% |
0.99 |
-3.0% |
1.4% |
5.3% |
0.30 |
-0.72 |
0.48 |
-19.5% |
9 |
-6.9% |
6.0% |
| Real estate: Slovenia (Eurostat) |
59 |
2010Q2 |
2024Q4 |
5.9% |
4.3% |
0.3% |
1.30 |
-2.7% |
1.7% |
3.9% |
0.22 |
-1.07 |
0.88 |
-13.4% |
9 |
-5.1% |
4.8% |
| Real estate: Slovenia (SURS) |
59 |
2010Q2 |
2024Q4 |
6.2% |
4.6% |
0.3% |
1.31 |
-2.6% |
1.9% |
4.3% |
0.33 |
-0.66 |
0.14 |
-12.9% |
7 |
-5.2% |
5.7% |
| Real estate: Slovenia ex-LJ (SURS) |
59 |
2010Q2 |
2024Q4 |
6.6% |
4.9% |
0.3% |
1.29 |
-2.8% |
1.9% |
5.1% |
0.26 |
-0.58 |
0.28 |
-8.9% |
6 |
-5.2% |
7.2% |
gtsave(tab_2_1, "Table_1_descriptive_stats.html")
# -----------------------------
# 5) TABLE 2 and 3 — Correlation matrices (nominal + real)
# - bold diagonal
# - light shading by magnitude
# -----------------------------
make_corr_table <- function(ret_long, table_title) {
wide <- ret_long %>%
select(date_q, series, r_log) %>%
pivot_wider(names_from = series, values_from = r_log) %>%
arrange(date_q)
corr <- cor(wide %>% select(-date_q), use = "pairwise.complete.obs")
corr_df <- as.data.frame(round(corr, 2)) %>% tibble::rownames_to_column("Series")
gt_tbl <- corr_df %>%
gt() %>%
cols_align(align = "left", columns = "Series") %>%
cols_align(align = "center", columns = -Series) %>%
fmt_number(columns = -Series, decimals = 2) %>%
tab_options(
table.border.top.width = px(1),
table.border.bottom.width = px(1),
table.border.left.width = px(1),
table.border.right.width = px(1),
table_body.hlines.width = px(1),
table_body.vlines.width = px(1),
heading.border.bottom.width = px(1),
data_row.padding = px(6)
) %>%
data_color(
columns = -Series,
colors = scales::col_numeric(
palette = c("#FFFFFF", "#F2F2F2"),
domain = c(0, 1)
),
alpha = 1
)
# Bold diagonal
series_names <- colnames(corr)
for (s in series_names) {
gt_tbl <- gt_tbl %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = s, rows = Series == s)
)
}
gt_tbl
}
tab_2_2 <- make_corr_table(ret_nom_long, "Table 2: Correlation matrix of nominal quarterly total returns")
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.
tab_2_3 <- make_corr_table(ret_real_long, "Table 3: Correlation matrix of real quarterly total returns")
tab_2_2
| Series |
Equities: SBITOP |
Real estate: Slovenia (Eurostat) |
Real estate: Slovenia (SURS) |
Real estate: Ljubljana (SURS) |
Real estate: Slovenia ex-LJ (SURS) |
| Equities: SBITOP |
1.00 |
0.09 |
0.05 |
0.05 |
0.03 |
| Real estate: Slovenia (Eurostat) |
0.09 |
1.00 |
0.82 |
0.67 |
0.77 |
| Real estate: Slovenia (SURS) |
0.05 |
0.82 |
1.00 |
0.82 |
0.92 |
| Real estate: Ljubljana (SURS) |
0.05 |
0.67 |
0.82 |
1.00 |
0.54 |
| Real estate: Slovenia ex-LJ (SURS) |
0.03 |
0.77 |
0.92 |
0.54 |
1.00 |
tab_2_3
| Series |
Equities: SBITOP |
Real estate: Slovenia (Eurostat) |
Real estate: Slovenia (SURS) |
Real estate: Ljubljana (SURS) |
Real estate: Slovenia ex-LJ (SURS) |
| Equities: SBITOP |
1.00 |
0.09 |
0.05 |
0.06 |
0.03 |
| Real estate: Slovenia (Eurostat) |
0.09 |
1.00 |
0.83 |
0.70 |
0.77 |
| Real estate: Slovenia (SURS) |
0.05 |
0.83 |
1.00 |
0.84 |
0.92 |
| Real estate: Ljubljana (SURS) |
0.06 |
0.70 |
0.84 |
1.00 |
0.57 |
| Real estate: Slovenia ex-LJ (SURS) |
0.03 |
0.77 |
0.92 |
0.57 |
1.00 |
gtsave(tab_2_2, "Table_2_corr_matrix_nominal.html")
gtsave(tab_2_3, "Table_3_corr_matrix_real.html")
# -----------------------------
# 6) FIGURE 1 — Risk–return scatter (fixed scale) + Sharpe
# Label includes Return, Vol, Sharpe
# -----------------------------
riskret <- ret_nom_long %>%
group_by(series) %>%
summarise(
ann_ret = ann_ret_from_log(r_log),
ann_vol = ann_vol_from_log(r_log),
ann_rf = ann_rf_from_log(rf_log_q),
sharpe = (ann_ret - ann_rf) / ann_vol,
.groups = "drop"
) %>%
mutate(
series_short = case_when(
series == "Equities: SBITOP" ~ "EQ: SBITOP",
series == "Real estate: Slovenia (Eurostat)" ~ "RE: SI (Eurostat)",
series == "Real estate: Slovenia (SURS)" ~ "RE: SI (SURS)",
series == "Real estate: Ljubljana (SURS)" ~ "RE: LJ (SURS)",
series == "Real estate: Slovenia ex-LJ (SURS)" ~ "RE: ex-LJ (SURS)",
TRUE ~ series
)
)
# Axis limits with padding so equities fully visible + labels don’t clip
x_min <- min(riskret$ann_vol, na.rm = TRUE); x_max <- max(riskret$ann_vol, na.rm = TRUE)
y_min <- min(riskret$ann_ret, na.rm = TRUE); y_max <- max(riskret$ann_ret, na.rm = TRUE)
x_pad <- 0.12 * (x_max - x_min); y_pad <- 0.12 * (y_max - y_min)
x_lim <- c(x_min - x_pad, x_max + x_pad)
y_lim <- c(y_min - y_pad, y_max + y_pad)
riskret <- riskret %>%
mutate(
lab_A = paste0(
series_short, "\n",
fmt_pct1(ann_ret), ", ", fmt_pct1(ann_vol), ", S=", fmt_num2(sharpe)
)
)
p2_1_A <- ggplot(riskret, aes(x = ann_vol, y = ann_ret)) +
geom_point(size = 3.2, alpha = 0.9) +
ggrepel::geom_text_repel(
aes(label = lab_A),
size = 3.3,
min.segment.length = 0,
box.padding = 0.35,
point.padding = 0.25,
max.overlaps = Inf
) +
scale_x_continuous(labels = percent_format(accuracy = 1), limits = x_lim) +
scale_y_continuous(labels = percent_format(accuracy = 1), limits = y_lim) +
labs(
x = "Annualized volatility",
y = "Annualized return"
) +
theme_thesis() +
theme(legend.position = "none")
p2_1_A

ggsave("Figure_1_risk_return_scatter_labels.png", p2_1_A,
width = 12, height = 7.2, dpi = 300, limitsize = FALSE)
# -----------------------------
# 7) FIGURE 2 — Return distributions
# - identical x-limits across facets
# - mean + median markers
# -----------------------------
dist_df <- ret_nom_long %>%
select(date_q, series, r_simple) %>%
filter(is.finite(r_simple)) %>%
mutate(series = factor(series, levels = series_map$label))
x_rng <- range(dist_df$r_simple, na.rm = TRUE)
x_pad2 <- 0.06 * (x_rng[2] - x_rng[1])
x_lim2 <- c(x_rng[1] - x_pad2, x_rng[2] + x_pad2)
markers <- dist_df %>%
group_by(series) %>%
summarise(
mean_q = mean(r_simple, na.rm = TRUE),
med_q = median(r_simple, na.rm = TRUE),
.groups = "drop"
)
p2_2 <- ggplot(dist_df, aes(x = r_simple)) +
geom_density(linewidth = 1.0, fill = "grey80", color = "grey20", adjust = 1.05) +
geom_vline(xintercept = 0, linewidth = 0.4, color = "grey65") +
geom_vline(data = markers, aes(xintercept = mean_q),
linewidth = 0.55, linetype = "solid", color = "grey35") +
geom_vline(data = markers, aes(xintercept = med_q),
linewidth = 0.55, linetype = "dashed", color = "grey35") +
facet_wrap(~ series, ncol = 2, scales = "fixed") +
coord_cartesian(xlim = x_lim2) +
scale_x_continuous(labels = percent_format(accuracy = 1)) +
labs(
x = "Quarterly total return (simple, nominal)",
y = "Density"
) +
theme_thesis() +
theme(legend.position = "none",
strip.text = element_text(face = "bold"))
p2_2

ggsave("Figure_2_return_distributions_nominal_fixed_scale.png", p2_2,
width = 12, height = 8.2, dpi = 300, limitsize = FALSE)
3. Graphical analysis
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(18, "pt"),
legend.spacing.x = unit(8, "pt"),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# inside ggplot(...)
guides(
color = guide_legend(nrow = 3, byrow = TRUE),
linetype = "none"
)
## <Guides[2] ggproto object>
##
## colour : <GuideLegend>
## linetype : "none"
3.1. Figure 3: Index levels comparisson - capital appreciation
only
# ------------------------------------------------------------
# FIGURE 3: Capital appreciation (price-only) – indexed growth since base period
#
# What this figure represents:
# - Equities: SBITOP (quarter-end close level; price-only, no dividends)
# - Real estate price indices (price-only, no rental income):
# * Eurostat HPI Slovenia (broad overall market coverage / all dwellings)
# * SURS HPI "selected series" used in this thesis:
# - Slovenia total
# - Ljubljana
# - Slovenia ex-Ljubljana
#
# Why rebasing is needed:
# - SBITOP and HPI are both index levels but with different base/scales.
# - Rebasing to 100 at a common base quarter makes them comparable as "growth since start".
#
# Why the graph was being cut:
# - Legend labels were long and exceeded the plotting canvas.
# - Fix: wrap legend labels + give enough bottom space + save with larger height.
#
# Output:
# - Prints a clean figure in RStudio/Rmd
# - Saves a high-resolution PNG that will not clip the legend
# ------------------------------------------------------------
# 1) Load packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(stringr)
# 2) Start from the LEVELS dataset (master) so the base quarter is included
# Ensure data are ordered by time.
levels_plot <- master %>% arrange(date_q)
# 3) Choose a robust base quarter:
# - First quarter where all required level series exist (Eurostat + SURS + SBITOP).
base <- levels_plot %>%
filter(
!is.na(sbitop_close),
!is.na(hpi_si_eurostat),
!is.na(hpi_si_surs_used),
!is.na(hpi_lj_surs_used),
!is.na(hpi_si_exlj_surs_used)
) %>%
slice(1)
# Base values for rebasing
P0 <- base$sbitop_close
He0 <- base$hpi_si_eurostat
Hs0 <- base$hpi_si_surs_used
Hl0 <- base$hpi_lj_surs_used
Hx0 <- base$hpi_si_exlj_surs_used
# 4) Rebase each index to 100 at the base quarter (capital appreciation only)
levels_plot <- levels_plot %>%
mutate(
idx_eq_price = 100 * sbitop_close / P0,
# Eurostat HPI: broad overall residential market coverage ("all dwellings")
idx_re_si_eu = 100 * hpi_si_eurostat / He0,
# SURS HPI "selected series": the SURS indices you chose to use in the thesis
idx_re_si_surs = 100 * hpi_si_surs_used / Hs0,
idx_re_lj_surs = 100 * hpi_lj_surs_used / Hl0,
idx_re_exlj = 100 * hpi_si_exlj_surs_used / Hx0
)
# 5) Convert to long format for ggplot and create reader-friendly labels
# IMPORTANT: Wrap labels to prevent legend clipping.
plot_1a_data <- levels_plot %>%
select(date_q, idx_eq_price, idx_re_si_eu, idx_re_si_surs, idx_re_lj_surs, idx_re_exlj) %>%
pivot_longer(-date_q, names_to = "series", values_to = "index") %>%
mutate(
series = recode(series,
idx_eq_price = "Equities: SBITOP (price-only)",
idx_re_si_eu = "Real estate: Slovenia (Eurostat HPI, all dwellings)",
idx_re_si_surs = "Real estate: Slovenia (SURS HPI, selected series)",
idx_re_lj_surs = "Real estate: Ljubljana (SURS HPI, selected series)",
idx_re_exlj = "Real estate: Slovenia ex-LJ (SURS HPI, selected series)"
),
# Wrap long legend labels so they fit on the canvas (prevents clipping)
series = str_wrap(series, width = 34)
)
# Set legend order AFTER wrapping (so ordering stays correct)
legend_levels <- str_wrap(c(
"Equities: SBITOP (price-only)",
"Real estate: Slovenia (Eurostat HPI, all dwellings)",
"Real estate: Slovenia (SURS HPI, selected series)",
"Real estate: Ljubljana (SURS HPI, selected series)",
"Real estate: Slovenia ex-LJ (SURS HPI, selected series)"
), width = 34)
plot_1a_data <- plot_1a_data %>%
mutate(series = factor(series, levels = legend_levels))
# 6) Optional event markers for narrative context (edit/remove if you want)
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# Put event labels just below the top of the plot
y_top <- max(plot_1a_data$index, na.rm = TRUE)
# 7) Define a thesis-style theme:
# - smaller legend text
# - extra margin/spacing so legend never clips
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
# Keep margins modest; the real anti-clipping fix is label wrapping + save height
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# 8) Build the plot
# - Use both color + linetype for readability
# - Show only ONE legend (color) to avoid duplicated legend entries
p1a <- ggplot(plot_1a_data, aes(x = date_q, y = index, color = series, linetype = series)) +
geom_line(linewidth = 1.0) +
scale_y_continuous(labels = label_number(accuracy = 1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
guides(
color = guide_legend(ncol = 2, byrow = TRUE), # 2 columns helps fit wrapped labels cleanly
linetype = "none"
) +
labs(
x = NULL,
y = "Index level (base = 100 at start)"
) +
theme_thesis() +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
# 9) Print in RStudio / in the knitted document
p1a

# 10) Save high-resolution image for thesis
ggsave(
filename = "Figure_3_price_only_indexed_growth.png",
plot = p1a,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
Consistency instuctions
# ============================================================
# FIGURE TOOLKIT (use for ALL figures to keep style consistent)
# ============================================================
library(ggplot2)
library(scales)
library(stringr)
# --- Global event markers (reuse across all plots) ---
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# --- Thesis theme: matches Figure 1A ---
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# --- Add event lines + labels (prevents aesthetic inheritance bugs) ---
add_events <- function(p, y_top, events_df = events) {
p +
geom_vline(
data = events_df,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events_df,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
}
# --- Save consistently (same size as Figure 1A) ---
save_figure <- function(plot_obj, filename, width = 12, height = 8.2, dpi = 300) {
ggsave(
filename = filename,
plot = plot_obj,
width = width,
height = height,
dpi = dpi,
limitsize = FALSE
)
}
3.2. Graph 4: Total retudn wealth indices
# ------------------------------------------------------------
# FIGURE 4: Total return wealth indices (investor experience) – indexed to 100
#
# What this figure represents:
# - Equities (SBITOP): price appreciation + dividend income (quarterly = dy_eq_ann/4)
# - Real estate (HPI): price appreciation + modeled rental income
# * Eurostat HPI Slovenia (broad overall market coverage / all dwellings)
# * SURS HPI "selected series" used in this thesis:
# - Slovenia total
# - Ljubljana
# - Slovenia ex-Ljubljana
#
# Why this figure is needed:
# - Figure 3 is price-only (capital appreciation) and excludes cash-flow returns.
# - Figure 4 adds income components (dividends + rents) to approximate investor experience.
#
# Important assumptions (baseline, later tested in sensitivity analysis):
# - Dividend income is allocated evenly across quarters: dy_eq_ann/4
# - Base gross annual rental yield y0_ann is calibrated as a constant starting yield
# - Rental yield evolves with rent/price ratio using HICP rents and HPI:
# y_t = y0_ann * [(hicp_rent_t / hicp_rent_0) / (HPI_t / HPI_0)]
# - Quarterly rental income is lagged (t-1) to avoid look-ahead: lag(y_t)/4
#
# Output:
# - Prints a clean figure in RStudio/Rmd (same style as Figure 1A)
# - Saves a high-resolution PNG that will not clip the legend
# ------------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(stringr)
# 1) Start from the master LEVELS dataset and ensure time order
levels_plot <- master %>% arrange(date_q)
# 2) Parameters for baseline rental income model
y0_ann <- 0.04 # base gross annual rental yield (baseline assumption)
# 3) Robust base quarter: first quarter where all required series exist
# (Needed for both rebasing and the rent/price yield model.)
base <- levels_plot %>%
filter(
!is.na(sbitop_close),
!is.na(dy_eq_ann),
!is.na(hicp_rent),
!is.na(hpi_si_eurostat),
!is.na(hpi_si_surs_used),
!is.na(hpi_lj_surs_used),
!is.na(hpi_si_exlj_surs_used)
) %>%
slice(1)
P0 <- base$sbitop_close
R0 <- base$hicp_rent
He0 <- base$hpi_si_eurostat
Hs0 <- base$hpi_si_surs_used
Hl0 <- base$hpi_lj_surs_used
Hx0 <- base$hpi_si_exlj_surs_used
# 4) Construct quarterly total log returns (price + income) for each series
# Equities: log price return + dividend yield/4
# Real estate: log price return + lagged rental yield/4
levels_plot <- levels_plot %>%
mutate(
# Equity quarterly income (baseline: evenly across quarters)
dy_eq_q = dy_eq_ann / 4,
# Equity log returns
r_eq_price_log = log(sbitop_close / lag(sbitop_close)),
r_eq_total_log = r_eq_price_log + dy_eq_q,
# Real estate price log returns
r_re_eu_price_log = log(hpi_si_eurostat / lag(hpi_si_eurostat)),
r_re_si_price_log = log(hpi_si_surs_used / lag(hpi_si_surs_used)),
r_re_lj_price_log = log(hpi_lj_surs_used / lag(hpi_lj_surs_used)),
r_re_exlj_price_log = log(hpi_si_exlj_surs_used / lag(hpi_si_exlj_surs_used)),
# Time-varying rental yields via rent/price ratio (anchored at base quarter)
y_eu_t = y0_ann * ((hicp_rent / R0) / (hpi_si_eurostat / He0)),
y_si_t = y0_ann * ((hicp_rent / R0) / (hpi_si_surs_used / Hs0)),
y_lj_t = y0_ann * ((hicp_rent / R0) / (hpi_lj_surs_used / Hl0)),
y_exlj_t = y0_ann * ((hicp_rent / R0) / (hpi_si_exlj_surs_used / Hx0)),
# Quarterly rental income (lagged yield to avoid look-ahead bias)
r_re_eu_income_q = lag(y_eu_t) / 4,
r_re_si_income_q = lag(y_si_t) / 4,
r_re_lj_income_q = lag(y_lj_t) / 4,
r_re_exlj_income_q = lag(y_exlj_t) / 4,
# Real estate total log returns
r_re_eu_total_log = r_re_eu_price_log + r_re_eu_income_q,
r_re_si_total_log = r_re_si_price_log + r_re_si_income_q,
r_re_lj_total_log = r_re_lj_price_log + r_re_lj_income_q,
r_re_exlj_total_log = r_re_exlj_price_log + r_re_exlj_income_q
)
# 5) Build wealth indices (start = 100) from total log returns
# replace_na(..., 0) ensures the first return is treated as 0 for cumulation.
levels_plot <- levels_plot %>%
mutate(
idx_eq_tr = 100 * exp(cumsum(replace_na(r_eq_total_log, 0))),
idx_re_eu_tr = 100 * exp(cumsum(replace_na(r_re_eu_total_log, 0))),
idx_re_si_tr = 100 * exp(cumsum(replace_na(r_re_si_total_log, 0))),
idx_re_lj_tr = 100 * exp(cumsum(replace_na(r_re_lj_total_log, 0))),
idx_re_exlj_tr = 100 * exp(cumsum(replace_na(r_re_exlj_total_log, 0)))
)
# 6) Convert to long format + readable labels (wrap to prevent legend clipping)
plot_1b_data <- levels_plot %>%
select(date_q, idx_eq_tr, idx_re_eu_tr, idx_re_si_tr, idx_re_lj_tr, idx_re_exlj_tr) %>%
pivot_longer(-date_q, names_to = "series", values_to = "wealth") %>%
mutate(
series = recode(series,
idx_eq_tr = "Equities: SBITOP (total return: price + dividends)",
idx_re_eu_tr = "Real estate: Slovenia (Eurostat HPI + rent model)",
idx_re_si_tr = "Real estate: Slovenia (SURS selected HPI + rent model)",
idx_re_lj_tr = "Real estate: Ljubljana (SURS selected HPI + rent model)",
idx_re_exlj_tr = "Real estate: Slovenia ex-LJ (SURS selected HPI + rent model)"
),
series = str_wrap(series, width = 34)
)
legend_levels_1b <- str_wrap(c(
"Equities: SBITOP (total return: price + dividends)",
"Real estate: Slovenia (Eurostat HPI + rent model)",
"Real estate: Slovenia (SURS selected HPI + rent model)",
"Real estate: Ljubljana (SURS selected HPI + rent model)",
"Real estate: Slovenia ex-LJ (SURS selected HPI + rent model)"
), width = 34)
plot_1b_data <- plot_1b_data %>%
mutate(series = factor(series, levels = legend_levels_1b))
# 7) Optional event markers for narrative context (reuse same dates as Figure 1A)
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
y_top <- max(plot_1b_data$wealth, na.rm = TRUE)
# 8) Reuse the same thesis theme as Figure 3 for visual consistency
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# 9) Plot Figure 4
p1b <- ggplot(plot_1b_data, aes(x = date_q, y = wealth, color = series, linetype = series)) +
geom_line(linewidth = 1.0) +
scale_y_continuous(labels = label_number(accuracy = 1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
guides(
color = guide_legend(ncol = 2, byrow = TRUE),
linetype = "none"
) +
labs(
x = NULL,
y = "Wealth index (base = 100 at start)"
) +
theme_thesis() +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
# 10) Print and save (same canvas approach as Figure 3 to prevent clipping)
p1b

ggsave(
filename = "Figure_4_total_return_wealth_indices.png",
plot = p1b,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
3.3. Graph 5: Real returns
# ------------------------------------------------------------
# FIGURE 5: REAL total return wealth indices (inflation-adjusted), indexed to 100
#
# What this figure represents:
# - Same total-return investor experience as Figure 1B (dividends + rent model),
# but expressed in REAL terms by deflating with HICP all-items inflation.
#
# Method (consistent with your methodology):
# 1) Compute nominal total log returns for each asset:
# - Equities: r_eq_total_log = log(P_t/P_{t-1}) + DY/4
# - Real estate: r_re_total_log = log(HPI_t/HPI_{t-1}) + lag(y_t)/4
# 2) Compute quarterly inflation log change:
# - pi_log = log(HICP_all_t / HICP_all_{t-1})
# 3) Compute real total log returns:
# - r_real_log = r_nominal_log - pi_log
# 4) Build real wealth indices:
# - idx_real = 100 * exp(cumsum(r_real_log))
#
# Output:
# - Prints a clean figure in RStudio/Rmd (same style as Figures 1A and 1B)
# - Saves a high-resolution PNG that will not clip the legend
# ------------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(stringr)
# 1) Start from the master LEVELS dataset and ensure time order
levels_plot <- master %>% arrange(date_q)
# 2) Parameters for baseline rental income model (same as Figure 1B)
y0_ann <- 0.04 # base gross annual rental yield (baseline)
# 3) Robust base quarter: first quarter where all required series exist
base <- levels_plot %>%
filter(
!is.na(sbitop_close),
!is.na(dy_eq_ann),
!is.na(hicp_rent),
!is.na(hicp_all),
!is.na(hpi_si_eurostat),
!is.na(hpi_si_surs_used),
!is.na(hpi_lj_surs_used),
!is.na(hpi_si_exlj_surs_used)
) %>%
slice(1)
R0 <- base$hicp_rent
He0 <- base$hpi_si_eurostat
Hs0 <- base$hpi_si_surs_used
Hl0 <- base$hpi_lj_surs_used
Hx0 <- base$hpi_si_exlj_surs_used
# 4) Inflation log change (quarterly)
# This is used to deflate nominal returns into real returns.
levels_plot <- levels_plot %>%
mutate(
pi_log = log(hicp_all / lag(hicp_all))
)
# 5) Construct nominal total log returns (same construction as Figure 4)
levels_plot <- levels_plot %>%
mutate(
# Equity income: annual dividend yield allocated evenly across quarters
dy_eq_q = dy_eq_ann / 4,
# Equity nominal total log return
r_eq_price_log = log(sbitop_close / lag(sbitop_close)),
r_eq_total_log = r_eq_price_log + dy_eq_q,
# Real estate price log returns
r_re_eu_price_log = log(hpi_si_eurostat / lag(hpi_si_eurostat)),
r_re_si_price_log = log(hpi_si_surs_used / lag(hpi_si_surs_used)),
r_re_lj_price_log = log(hpi_lj_surs_used / lag(hpi_lj_surs_used)),
r_re_exlj_price_log = log(hpi_si_exlj_surs_used / lag(hpi_si_exlj_surs_used)),
# Time-varying rental yields via rent/price ratio (anchored at base quarter)
y_eu_t = y0_ann * ((hicp_rent / R0) / (hpi_si_eurostat / He0)),
y_si_t = y0_ann * ((hicp_rent / R0) / (hpi_si_surs_used / Hs0)),
y_lj_t = y0_ann * ((hicp_rent / R0) / (hpi_lj_surs_used / Hl0)),
y_exlj_t = y0_ann * ((hicp_rent / R0) / (hpi_si_exlj_surs_used / Hx0)),
# Quarterly rental income (lagged yield to avoid look-ahead bias)
r_re_eu_income_q = lag(y_eu_t) / 4,
r_re_si_income_q = lag(y_si_t) / 4,
r_re_lj_income_q = lag(y_lj_t) / 4,
r_re_exlj_income_q = lag(y_exlj_t) / 4,
# Real estate nominal total log returns
r_re_eu_total_log = r_re_eu_price_log + r_re_eu_income_q,
r_re_si_total_log = r_re_si_price_log + r_re_si_income_q,
r_re_lj_total_log = r_re_lj_price_log + r_re_lj_income_q,
r_re_exlj_total_log = r_re_exlj_price_log + r_re_exlj_income_q
)
# 6) Convert nominal total log returns to REAL total log returns by subtracting inflation
levels_plot <- levels_plot %>%
mutate(
r_eq_total_real_log = r_eq_total_log - pi_log,
r_re_eu_total_real_log = r_re_eu_total_log - pi_log,
r_re_si_total_real_log = r_re_si_total_log - pi_log,
r_re_lj_total_real_log = r_re_lj_total_log - pi_log,
r_re_exlj_total_real_log = r_re_exlj_total_log - pi_log
)
# 7) Build REAL wealth indices from real log returns (start = 100)
levels_plot <- levels_plot %>%
mutate(
idx_eq_tr_real = 100 * exp(cumsum(replace_na(r_eq_total_real_log, 0))),
idx_re_eu_tr_real = 100 * exp(cumsum(replace_na(r_re_eu_total_real_log, 0))),
idx_re_si_tr_real = 100 * exp(cumsum(replace_na(r_re_si_total_real_log, 0))),
idx_re_lj_tr_real = 100 * exp(cumsum(replace_na(r_re_lj_total_real_log, 0))),
idx_re_exlj_tr_real = 100 * exp(cumsum(replace_na(r_re_exlj_total_real_log, 0)))
)
# 8) Convert to long format + readable labels (wrap to prevent legend clipping)
plot_1c_data <- levels_plot %>%
select(date_q, idx_eq_tr_real, idx_re_eu_tr_real, idx_re_si_tr_real, idx_re_lj_tr_real, idx_re_exlj_tr_real) %>%
pivot_longer(-date_q, names_to = "series", values_to = "wealth_real") %>%
mutate(
series = recode(series,
idx_eq_tr_real = "Equities: SBITOP (real total return)",
idx_re_eu_tr_real = "Real estate: Slovenia (Eurostat HPI + rent model, real)",
idx_re_si_tr_real = "Real estate: Slovenia (SURS selected HPI + rent model, real)",
idx_re_lj_tr_real = "Real estate: Ljubljana (SURS selected HPI + rent model, real)",
idx_re_exlj_tr_real = "Real estate: Slovenia ex-LJ (SURS selected HPI + rent model, real)"
),
series = str_wrap(series, width = 34)
)
legend_levels_1c <- str_wrap(c(
"Equities: SBITOP (real total return)",
"Real estate: Slovenia (Eurostat HPI + rent model, real)",
"Real estate: Slovenia (SURS selected HPI + rent model, real)",
"Real estate: Ljubljana (SURS selected HPI + rent model, real)",
"Real estate: Slovenia ex-LJ (SURS selected HPI + rent model, real)"
), width = 34)
plot_1c_data <- plot_1c_data %>%
mutate(series = factor(series, levels = legend_levels_1c))
# 9) Optional event markers (same as Figures 3 and 4)
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
y_top <- max(plot_1c_data$wealth_real, na.rm = TRUE)
# 10) Same thesis theme as Figures 3 and 4 (keep visual consistency)
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# 11) Plot Figure 5
p1c <- ggplot(plot_1c_data, aes(x = date_q, y = wealth_real, color = series, linetype = series)) +
geom_line(linewidth = 1.0) +
scale_y_continuous(labels = label_number(accuracy = 1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
guides(
color = guide_legend(ncol = 2, byrow = TRUE),
linetype = "none"
) +
labs(
x = NULL,
y = "Real wealth index (base = 100 at start)"
) +
theme_thesis() +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
# 12) Print and save (same canvas as Figures 3 and 4)
p1c

ggsave(
filename = "Figure_5_real_total_return_wealth_indices.png",
plot = p1c,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
3.3. Graph 6 & 7: Nominal & Real returns
# ============================================================
# FIGURE 6 & 7 (TWO-PANEL, TWO LEGENDS)
# Panel 1 (bigger): Equities vs Slovenia real estate (Eurostat)
# Panel 2 (smaller): Real estate comparison (Eurostat vs SURS regions)
# Each panel has its OWN legend directly below it.
# Colors/roles are consistent with Figures 3–5.
# ============================================================
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)
library(scales)
library(patchwork)
# -----------------------------
# 0) Thesis theme (consistent)
# -----------------------------
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 6),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
# -----------------------------
# 1) Event markers (same dates)
# -----------------------------
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
add_events <- function(p, y_top) {
p +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
}
# -----------------------------
# 2) Global labels (stable)
# -----------------------------
LAB_EQ <- "Equities: SBITOP"
LAB_EU <- "Real estate: Slovenia (Eurostat)"
LAB_SI <- "Real estate: Slovenia (SURS)"
LAB_LJ <- "Real estate: Ljubljana (SURS)"
LAB_EX <- "Real estate: Slovenia ex-LJ (SURS)"
# Legend order (global + per panel)
LEGEND_ALL <- c(LAB_EQ, LAB_EU, LAB_SI, LAB_LJ, LAB_EX)
LEGEND_P1 <- c(LAB_EQ, LAB_EU)
LEGEND_P2 <- c(LAB_EU, LAB_SI, LAB_LJ, LAB_EX)
# -----------------------------
# 3) Global colors/linetypes
# (match roles used in 3–5)
# -----------------------------
COLORS <- c(
`Equities: SBITOP` = "#F8766D", # red
`Real estate: Slovenia (Eurostat)` = "#B79F00", # mustard
`Real estate: Slovenia (SURS)` = "#00BFC4", # teal
`Real estate: Ljubljana (SURS)` = "#619CFF", # blue
`Real estate: Slovenia ex-LJ (SURS)` = "#C77CFF" # purple
)
LTYPES <- c(
`Equities: SBITOP` = "solid",
`Real estate: Slovenia (Eurostat)` = "dashed",
`Real estate: Slovenia (SURS)` = "dashed",
`Real estate: Ljubljana (SURS)` = "dashed",
`Real estate: Slovenia ex-LJ (SURS)` = "dotted"
)
# -----------------------------
# 4) Build return series ONCE
# (assumes master exists)
# -----------------------------
y0_ann <- 0.04
df <- master %>% arrange(date_q)
# Anchors for rent-yield model (robust)
R0 <- df %>% filter(!is.na(hicp_rent)) %>% slice(1) %>% pull(hicp_rent)
He0 <- df %>% filter(!is.na(hpi_si_eurostat)) %>% slice(1) %>% pull(hpi_si_eurostat)
Hs0 <- df %>% filter(!is.na(hpi_si_surs_used)) %>% slice(1) %>% pull(hpi_si_surs_used)
Hl0 <- df %>% filter(!is.na(hpi_lj_surs_used)) %>% slice(1) %>% pull(hpi_lj_surs_used)
Hx0 <- df %>% filter(!is.na(hpi_si_exlj_surs_used)) %>% slice(1) %>% pull(hpi_si_exlj_surs_used)
df <- df %>%
mutate(
# Inflation (quarterly log change)
pi_log = log(hicp_all / lag(hicp_all)),
# Equities: nominal total log return
dy_eq_q = dy_eq_ann / 4,
r_eq_price_log = log(sbitop_close / lag(sbitop_close)),
r_eq_total_log = r_eq_price_log + dy_eq_q,
# Real estate (Eurostat): nominal total log return
r_re_eu_price_log = log(hpi_si_eurostat / lag(hpi_si_eurostat)),
y_eu_t = y0_ann * ((hicp_rent / R0) / (hpi_si_eurostat / He0)),
r_re_eu_income_q = lag(y_eu_t) / 4,
r_re_eu_total_log = r_re_eu_price_log + r_re_eu_income_q,
# Real estate (SURS regions): nominal total log returns
r_re_si_price_log = log(hpi_si_surs_used / lag(hpi_si_surs_used)),
r_re_lj_price_log = log(hpi_lj_surs_used / lag(hpi_lj_surs_used)),
r_re_ex_price_log = log(hpi_si_exlj_surs_used / lag(hpi_si_exlj_surs_used)),
y_si_t = y0_ann * ((hicp_rent / R0) / (hpi_si_surs_used / Hs0)),
y_lj_t = y0_ann * ((hicp_rent / R0) / (hpi_lj_surs_used / Hl0)),
y_ex_t = y0_ann * ((hicp_rent / R0) / (hpi_si_exlj_surs_used / Hx0)),
r_re_si_income_q = lag(y_si_t) / 4,
r_re_lj_income_q = lag(y_lj_t) / 4,
r_re_ex_income_q = lag(y_ex_t) / 4,
r_re_si_total_log = r_re_si_price_log + r_re_si_income_q,
r_re_lj_total_log = r_re_lj_price_log + r_re_lj_income_q,
r_re_ex_total_log = r_re_ex_price_log + r_re_ex_income_q,
# Real (inflation-adjusted) total log returns
r_eq_total_real_log = r_eq_total_log - pi_log,
r_re_eu_total_real_log = r_re_eu_total_log - pi_log,
r_re_si_total_real_log = r_re_si_total_log - pi_log,
r_re_lj_total_real_log = r_re_lj_total_log - pi_log,
r_re_ex_total_real_log = r_re_ex_total_log - pi_log
)
# -----------------------------
# 5) Factory: 2-panel figure with 2 legends
# -----------------------------
make_two_panel_two_legends <- function(df,
eq_col, eu_col, si_col, lj_col, ex_col,
fig_subtitle, ylab,
out_file) {
# Panel 1 data: EQ vs Eurostat
p1_data <- df %>%
transmute(
date_q,
`Equities: SBITOP` = .data[[eq_col]],
`Real estate: Slovenia (Eurostat)` = .data[[eu_col]]
) %>%
pivot_longer(-date_q, names_to = "series", values_to = "ret") %>%
filter(!is.na(ret)) %>%
mutate(series = factor(series, levels = LEGEND_ALL))
# Panel 2 data: RE comparison only
p2_data <- df %>%
transmute(
date_q,
`Real estate: Slovenia (Eurostat)` = .data[[eu_col]],
`Real estate: Slovenia (SURS)` = .data[[si_col]],
`Real estate: Ljubljana (SURS)` = .data[[lj_col]],
`Real estate: Slovenia ex-LJ (SURS)` = .data[[ex_col]]
) %>%
pivot_longer(-date_q, names_to = "series", values_to = "ret") %>%
filter(!is.na(ret)) %>%
mutate(series = factor(series, levels = LEGEND_ALL))
y_top_p1 <- max(p1_data$ret, na.rm = TRUE)
y_top_p2 <- max(p2_data$ret, na.rm = TRUE)
# Panel 1 plot
panel1 <- ggplot(p1_data, aes(date_q, ret, color = series, linetype = series)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey70") +
geom_line(linewidth = 1.05) +
scale_color_manual(values = COLORS, breaks = LEGEND_P1, drop = FALSE) +
scale_linetype_manual(values = LTYPES, breaks = LEGEND_P1, drop = FALSE) +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(
subtitle = fig_subtitle,
x = NULL,
y = ylab
) +
theme_thesis() +
theme(
legend.position = "bottom",
plot.margin = margin(t = 10, r = 10, b = 6, l = 10)
) +
guides(
color = guide_legend(ncol = 2, byrow = TRUE),
linetype = "none"
)
panel1 <- add_events(panel1, y_top_p1)
# Panel 2 plot
panel2 <- ggplot(p2_data, aes(date_q, ret, color = series, linetype = series)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey70") +
geom_line(linewidth = 0.95) +
scale_color_manual(values = COLORS, breaks = LEGEND_P2, drop = FALSE) +
scale_linetype_manual(values = LTYPES, breaks = LEGEND_P2, drop = FALSE) +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
labs(
subtitle = "Panel 2: Real estate comparison (Eurostat vs SURS regions)",
x = NULL, y = NULL
) +
theme_thesis() +
theme(
plot.subtitle = element_text(size = 10, color = "grey30", face = "bold"),
legend.position = "bottom",
plot.margin = margin(t = 0, r = 10, b = 10, l = 10)
) +
guides(
color = guide_legend(ncol = 2, byrow = TRUE),
linetype = "none"
)
panel2 <- add_events(panel2, y_top_p2)
# Combine (Panel 1 larger)
fig <- panel1 / panel2 + plot_layout(heights = c(2.2, 1))
print(fig)
ggsave(
filename = out_file,
plot = fig,
width = 12,
height = 10.4,
dpi = 300,
limitsize = FALSE
)
}
# -----------------------------
# 6) FIGURE 6 (Nominal)
# -----------------------------
make_two_panel_two_legends(
df = df,
eq_col = "r_eq_total_log",
eu_col = "r_re_eu_total_log",
si_col = "r_re_si_total_log",
lj_col = "r_re_lj_total_log",
ex_col = "r_re_ex_total_log",
fig_subtitle = "Panel 1: Equities vs Slovenia real estate (Eurostat).",
ylab = "Quarterly total return (log, nominal)",
out_file = "Figure_6_quarterly_nominal_total_returns.png"
)

# -----------------------------
# 7) FIGURE 7 (Real)
# -----------------------------
make_two_panel_two_legends(
df = df,
eq_col = "r_eq_total_real_log",
eu_col = "r_re_eu_total_real_log",
si_col = "r_re_si_total_real_log",
lj_col = "r_re_lj_total_real_log",
ex_col = "r_re_ex_total_real_log",
fig_subtitle = "Panel 1: Equities vs Eurostat Slovenia real estate.",
ylab = "Quarterly total return (log, real)",
out_file = "Figure_7_quarterly_real_total_returns.png"
)

3.4. Graph 8 & 9: Return distribution & QQ plot
# ============================================================
# Figures:
# - Figure 8: Return distribution (density) — equities vs Eurostat real estate (NOMINAL)
# - Figure 9: QQ plots vs Normal — equities and Eurostat real estate (NOMINAL)
#
# Scope (as requested): ONLY 2 asset classes
# 1) Equities: SBITOP (nominal total log return)
# 2) Real estate: Slovenia (Eurostat, nominal total log return)
#
# Notes:
# - Uses quarterly NOMINAL log total returns already computed earlier in your script:
# r_eq_total_log, r_re_eu_total_log
# - If you have not computed these columns yet, run the Figure 2A/2B data-prep chunk first.
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
# -----------------------------
# 1) Prepare the return dataset
# -----------------------------
ret2 <- df %>% # df from your previous chunk where returns are computed
select(date_q, r_eq_total_log, r_re_eu_total_log) %>%
pivot_longer(-date_q, names_to = "asset", values_to = "ret_log") %>%
filter(!is.na(ret_log)) %>%
mutate(
asset = recode(asset,
r_eq_total_log = "Equities: SBITOP",
r_re_eu_total_log = "Real estate: Slovenia (Eurostat)"
),
# keep the same color roles as earlier figures (SBITOP red, Eurostat mustard)
asset = factor(asset, levels = c("Equities: SBITOP", "Real estate: Slovenia (Eurostat)"))
)
# manual colors consistent with your earlier palette
COL_2ASSETS <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00"
)
# -----------------------------
# 2) Figure 8: Distribution plot
# -----------------------------
# Density is usually cleaner than histogram for thesis comparisons.
# Keep y-axis as density and x-axis in percent units.
p2c <- ggplot(ret2, aes(x = ret_log, color = asset)) +
geom_density(linewidth = 1.05, adjust = 1.05) +
geom_vline(xintercept = 0, linewidth = 0.35, color = "grey65") +
scale_color_manual(values = COL_2ASSETS) +
scale_x_continuous(labels = percent_format(accuracy = 1)) +
labs(
x = "Quarterly total return (log, nominal)",
y = "Density"
) +
theme_thesis() +
theme(
legend.position = "bottom",
legend.title = element_blank()
)
p2c

ggsave(
filename = "Figure_8_return_distribution_density_nominal.png",
plot = p2c,
width = 12,
height = 6.8,
dpi = 300,
limitsize = FALSE
)
# -----------------------------
# 3) Figure 9: QQ plots vs Normal
# -----------------------------
# Approach:
# - Build two QQ plots (one per asset) and stack them.
# - Use sample mean/sd implied by ggplot's stat_qq_line().
# - This visually shows tail fatness / deviations from normality.
# 1) Build a clean long dataset of nominal quarterly log total returns (same inputs as Fig 2C)
qq_df <- df %>%
select(date_q, r_eq_total_log, r_re_eu_total_log) %>%
pivot_longer(-date_q, names_to = "asset", values_to = "ret_log") %>%
filter(!is.na(ret_log)) %>%
mutate(
asset = recode(asset,
r_eq_total_log = "Equities: SBITOP",
r_re_eu_total_log = "Real estate: Slovenia (Eurostat)"
),
asset = factor(asset, levels = c("Equities: SBITOP", "Real estate: Slovenia (Eurostat)"))
)
# 2) Use the SAME y-scale in both panels:
# - set limits using the combined min/max across BOTH assets
y_lim <- range(qq_df$ret_log, na.rm = TRUE)
# 3) Use the SAME x-scale in both panels:
# - theoretical quantiles depend only on n; use the MAX sample size (conservative)
n_max <- qq_df %>% count(asset) %>% pull(n) %>% max()
x_lim <- range(qnorm(ppoints(n_max))) # same theoretical range for both panels
# 4) Colors consistent with your previous figures
COL_2ASSETS <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00"
)
# 5) Plot: separate QQ line per panel (normal line fitted to that asset),
# but identical axis limits so the comparison is apples-to-apples.
p2d <- ggplot(qq_df, aes(sample = ret_log, color = asset)) +
stat_qq(size = 1.6, alpha = 0.85) +
stat_qq_line(linewidth = 0.8, color = "grey35") +
facet_wrap(~ asset, ncol = 1) +
scale_color_manual(values = COL_2ASSETS, guide = "none") +
coord_cartesian(xlim = x_lim, ylim = y_lim) +
scale_x_continuous(labels = label_number(accuracy = 0.1)) +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
labs(
x = "Theoretical quantiles (Normal)",
y = "Sample quantiles (quarterly log total return, nominal)"
) +
theme_thesis() +
theme(strip.text = element_text(face = "bold"))
p2d

ggsave(
filename = "Figure_9_QQplots_same_scale_nominal.png",
plot = p2d,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
3.5. Graph 10: Rolling volatility
# ============================================================
# FIGURE 10: Rolling volatility (8-quarter rolling standard deviation)
# + Event markers (Euro area crisis, COVID-19 shock, Inflation & rate shock)
# Panel 1: SBITOP vs RE Slovenia (Eurostat)
# Panel 2: RE comparison (Eurostat vs SURS SI/LJ/ex-LJ)
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(zoo)
library(patchwork)
# --- Rolling window length (8 quarters = 2 years) ---
win_q <- 8
# --- Event markers (same as previous figures) ---
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# --- Colors consistent with Figures 1A–2D ---
COL_5 <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00",
"Real estate: Slovenia (SURS)" = "#00BFC4",
"Real estate: Ljubljana (SURS)" = "#619CFF",
"Real estate: Slovenia ex-LJ (SURS)" = "#C77CFF"
)
# --- Linetypes: equities solid; real estate dashed (as in prior figures) ---
LT_5 <- c(
"Equities: SBITOP" = "solid",
"Real estate: Slovenia (Eurostat)" = "dashed",
"Real estate: Slovenia (SURS)" = "dashed",
"Real estate: Ljubljana (SURS)" = "dashed",
"Real estate: Slovenia ex-LJ (SURS)" = "dashed"
)
# --- Rolling SD helper ---
roll_sd <- function(x, k) {
zoo::rollapply(x, width = k, FUN = sd, fill = NA, align = "right", na.rm = TRUE)
}
# 1) Build df_vol (assumes df exists from your Figure 2A/2B chunk)
df_vol <- df %>%
arrange(date_q) %>%
select(
date_q,
r_eq_total_log,
r_re_eu_total_log,
r_re_si_total_log,
r_re_lj_total_log,
r_re_ex_total_log
) %>%
mutate(
vol_eq = roll_sd(r_eq_total_log, win_q),
vol_re_eu = roll_sd(r_re_eu_total_log, win_q),
vol_re_si = roll_sd(r_re_si_total_log, win_q),
vol_re_lj = roll_sd(r_re_lj_total_log, win_q),
vol_re_exlj = roll_sd(r_re_ex_total_log, win_q)
)
# 2) Panel 1 data (ONLY SBITOP + Eurostat)
p3a_p1_data <- df_vol %>%
select(date_q, vol_eq, vol_re_eu) %>%
pivot_longer(-date_q, names_to = "series", values_to = "vol") %>%
filter(!is.na(vol)) %>%
mutate(
series = recode(series,
vol_eq = "Equities: SBITOP",
vol_re_eu = "Real estate: Slovenia (Eurostat)"
),
series = factor(series, levels = c("Equities: SBITOP", "Real estate: Slovenia (Eurostat)"))
)
# 3) Panel 2 data (ONLY real estate series)
p3a_p2_data <- df_vol %>%
select(date_q, vol_re_eu, vol_re_si, vol_re_lj, vol_re_exlj) %>%
pivot_longer(-date_q, names_to = "series", values_to = "vol") %>%
filter(!is.na(vol)) %>%
mutate(
series = recode(series,
vol_re_eu = "Real estate: Slovenia (Eurostat)",
vol_re_si = "Real estate: Slovenia (SURS)",
vol_re_lj = "Real estate: Ljubljana (SURS)",
vol_re_exlj = "Real estate: Slovenia ex-LJ (SURS)"
),
series = factor(series, levels = c(
"Real estate: Slovenia (Eurostat)",
"Real estate: Slovenia (SURS)",
"Real estate: Ljubljana (SURS)",
"Real estate: Slovenia ex-LJ (SURS)"
))
)
# 4) Y-positions for event labels (panel-specific so labels sit near the top)
y_top_p1 <- max(p3a_p1_data$vol, na.rm = TRUE)
y_top_p2 <- max(p3a_p2_data$vol, na.rm = TRUE)
# 5) Panel 1 plot + events
p3a_panel1 <- ggplot(p3a_p1_data, aes(date_q, vol, color = series, linetype = series)) +
geom_line(linewidth = 1.05) +
scale_color_manual(values = COL_5, drop = FALSE) +
scale_linetype_manual(values = LT_5, drop = FALSE) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
labs(
subtitle = "Panel 1: SBITOP vs Slovenia real estate (Eurostat).",
x = NULL,
y = "Rolling SD of quarterly total returns (log, nominal)"
) +
theme_thesis() +
guides(color = guide_legend(ncol = 2, byrow = TRUE), linetype = "none") +
geom_vline(
data = events, aes(xintercept = date),
linewidth = 0.4, linetype = 3, color = "grey60", inherit.aes = FALSE
) +
geom_text(
data = events, aes(x = date, y = 0.96 * y_top_p1, label = label),
vjust = -0.3, hjust = 0, size = 3.2, color = "grey35", inherit.aes = FALSE
)
# 6) Panel 2 plot + events
p3a_panel2 <- ggplot(p3a_p2_data, aes(date_q, vol, color = series, linetype = series)) +
geom_line(linewidth = 0.95) +
scale_color_manual(values = COL_5, drop = FALSE) +
scale_linetype_manual(values = LT_5, drop = FALSE) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
labs(
subtitle = "Panel 2: Real estate volatility comparison (Eurostat vs SURS regions)",
x = NULL,
y = NULL
) +
theme_thesis() +
guides(color = guide_legend(ncol = 2, byrow = TRUE), linetype = "none") +
geom_vline(
data = events, aes(xintercept = date),
linewidth = 0.4, linetype = 3, color = "grey60", inherit.aes = FALSE
) +
geom_text(
data = events, aes(x = date, y = 0.96 * y_top_p2, label = label),
vjust = -0.3, hjust = 0, size = 3.2, color = "grey35", inherit.aes = FALSE
)
# 7) Combine (Panel 1 larger)
p3a <- p3a_panel1 / p3a_panel2 + patchwork::plot_layout(heights = c(2.2, 1))
p3a

ggsave(
filename = "Figure_10_rolling_volatility_8q.png",
plot = p3a,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
3.6. Graph 11: Drawdowns
# ============================================================
# FIGURE 11: Drawdowns (underwater plot)
# - Panel 1 (larger): SBITOP vs Slovenia real estate (Eurostat)
# - Panel 2 (smaller): Real estate drawdowns (Eurostat vs SURS regions)
# - Separate legends per panel (only series shown)
# - Event labels visible in BOTH panels
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(patchwork)
# ---- Event markers (same as before) ----
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# ---- Colours consistent with your previous figures ----
COLS <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00",
"Real estate: Slovenia (SURS)" = "#00BFC4",
"Real estate: Ljubljana (SURS)" = "#619CFF",
"Real estate: Slovenia ex-LJ (SURS)"= "#C77CFF"
)
# ---- Linetypes consistent with your convention ----
LTYPES <- c(
"Equities: SBITOP" = "solid",
"Real estate: Slovenia (Eurostat)" = "dashed",
"Real estate: Slovenia (SURS)" = "dashed",
"Real estate: Ljubljana (SURS)" = "dashed",
"Real estate: Slovenia ex-LJ (SURS)"= "dashed"
)
# ---- Helper: wealth index from log returns ----
to_wealth <- function(r_log) 100 * exp(cumsum(replace_na(r_log, 0)))
# ---- Helper: drawdown from wealth index (in decimals, <= 0) ----
to_drawdown <- function(w) (w / cummax(w)) - 1
# ---- IMPORTANT: this assumes your 'df' already exists from Figure 6/7
# and contains nominal total log returns:
# r_eq_total_log, r_re_eu_total_log, r_re_si_total_log, r_re_lj_total_log, r_re_exlj_total_log
# If not, create it first via your Figure 2A/2B return-construction chunk.
df_dd <- df %>%
arrange(date_q) %>%
select(date_q,
r_eq_total_log,
r_re_eu_total_log,
r_re_si_total_log,
r_re_lj_total_log,
r_re_ex_total_log) %>%
mutate(
w_eq = to_wealth(r_eq_total_log),
w_re_eu = to_wealth(r_re_eu_total_log),
w_re_si = to_wealth(r_re_si_total_log),
w_re_lj = to_wealth(r_re_lj_total_log),
w_re_ex = to_wealth(r_re_ex_total_log),
dd_eq = to_drawdown(w_eq),
dd_re_eu = to_drawdown(w_re_eu),
dd_re_si = to_drawdown(w_re_si),
dd_re_lj = to_drawdown(w_re_lj),
dd_re_ex = to_drawdown(w_re_ex)
)
# ------------------------------------------------------------
# Panel 1: Equity vs Eurostat RE
# ------------------------------------------------------------
p3b_p1 <- df_dd %>%
select(date_q, dd_eq, dd_re_eu) %>%
pivot_longer(-date_q, names_to = "series", values_to = "dd") %>%
mutate(
series = recode(series,
dd_eq = "Equities: SBITOP",
dd_re_eu = "Real estate: Slovenia (Eurostat)"
)
) %>%
filter(!is.na(dd))
# Place event labels slightly below 0 so they show on underwater plots
y_lab_p1 <- -0.02
p_panel1 <- ggplot(p3b_p1, aes(date_q, dd, color = series, linetype = series)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey60") +
geom_line(linewidth = 1.05) +
scale_color_manual(values = COLS, breaks = c("Equities: SBITOP", "Real estate: Slovenia (Eurostat)")) +
scale_linetype_manual(values = LTYPES, breaks = c("Equities: SBITOP", "Real estate: Slovenia (Eurostat)")) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
subtitle = "Panel 1: SBITOP vs Slovenia real estate (Eurostat).",
x = NULL,
y = "Drawdown from peak (%, nominal)"
) +
theme_thesis() +
theme(
legend.position = "bottom",
plot.margin = margin(t = 10, r = 10, b = 8, l = 10)
) +
geom_vline(
data = events, aes(xintercept = date),
linewidth = 0.4, linetype = 3, color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = y_lab_p1, label = label),
hjust = 0, vjust = -0.3, size = 3.2, color = "grey35",
inherit.aes = FALSE
) +
coord_cartesian(clip = "off")
# ------------------------------------------------------------
# Panel 2: Real estate drawdowns (Eurostat + SURS regions)
# ------------------------------------------------------------
p3b_p2 <- df_dd %>%
select(date_q, dd_re_eu, dd_re_si, dd_re_lj, dd_re_ex) %>%
pivot_longer(-date_q, names_to = "series", values_to = "dd") %>%
mutate(
series = recode(series,
dd_re_eu = "Real estate: Slovenia (Eurostat)",
dd_re_si = "Real estate: Slovenia (SURS)",
dd_re_lj = "Real estate: Ljubljana (SURS)",
dd_re_ex = "Real estate: Slovenia ex-LJ (SURS)"
)
) %>%
filter(!is.na(dd))
y_lab_p2 <- -0.01
p_panel2 <- ggplot(p3b_p2, aes(date_q, dd, color = series, linetype = series)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey60") +
geom_line(linewidth = 0.95) +
scale_color_manual(
values = COLS,
breaks = c(
"Real estate: Slovenia (Eurostat)",
"Real estate: Slovenia (SURS)",
"Real estate: Ljubljana (SURS)",
"Real estate: Slovenia ex-LJ (SURS)"
)
) +
scale_linetype_manual(
values = LTYPES,
breaks = c(
"Real estate: Slovenia (Eurostat)",
"Real estate: Slovenia (SURS)",
"Real estate: Ljubljana (SURS)",
"Real estate: Slovenia ex-LJ (SURS)"
)
) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
subtitle = "Panel 2: Real estate drawdowns (Eurostat vs SURS regions)",
x = NULL,
y = NULL
) +
theme_thesis() +
theme(
legend.position = "bottom",
plot.title = element_text(face = "bold", size = 12),
plot.margin = margin(t = 2, r = 10, b = 10, l = 10)
) +
geom_vline(
data = events, aes(xintercept = date),
linewidth = 0.4, linetype = 3, color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = y_lab_p2, label = label),
hjust = 0, vjust = -0.3, size = 3.0, color = "grey35",
inherit.aes = FALSE
) +
coord_cartesian(clip = "off")
# ------------------------------------------------------------
# Combine: Panel 1 emphasized (larger) + Panel 2 smaller
# ------------------------------------------------------------
p3b <- p_panel1 / p_panel2 +
plot_layout(heights = c(2.6, 1.0))
p3b

ggsave(
filename = "Figure_11_drawdowns_underwater_emphasized.png",
plot = p3b,
width = 13, height = 9.8, dpi = 300,
limitsize = FALSE
)
3.7. Graph 12: Extreme quarters
# ============================================================
# FIGURE 12: Tail events = worst 5 + best 5 quarters
# - Horizontal bar chart, labeled
# - Two assets: SBITOP vs Slovenia RE (Eurostat)
# - Uses nominal quarterly total returns (log)
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
library(patchwork)
# 1) Prepare the two asset return series (nominal total log returns)
ret_long <- df %>%
transmute(
date_q,
`Equities: SBITOP` = r_eq_total_log,
`Real estate: Slovenia (Eurostat)` = r_re_eu_total_log
) %>%
pivot_longer(-date_q, names_to = "asset", values_to = "ret_log") %>%
filter(!is.na(ret_log)) %>%
mutate(
q_label = paste0(year(date_q), " Q", quarter(date_q))
)
# 2) Select worst 5 and best 5 per asset
worst5 <- ret_long %>%
group_by(asset) %>%
slice_min(order_by = ret_log, n = 5, with_ties = FALSE) %>%
mutate(tail = "Worst 5")
best5 <- ret_long %>%
group_by(asset) %>%
slice_max(order_by = ret_log, n = 5, with_ties = FALSE) %>%
mutate(tail = "Best 5")
extremes <- bind_rows(worst5, best5) %>%
ungroup() %>%
mutate(
tail = factor(tail, levels = c("Worst 5", "Best 5")),
label = percent(ret_log, accuracy = 0.1)
)
# 3) Force chronological order for ONLY the selected quarters
# Oldest at TOP -> newest at BOTTOM
q_levels_used <- extremes %>%
distinct(date_q, q_label) %>%
arrange(date_q) %>%
pull(q_label)
extremes <- extremes %>%
mutate(q_label = factor(q_label, levels = rev(q_levels_used))) # rev => oldest top
# 4) Colours consistent with earlier figures
COL_2 <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00"
)
# 5) Make sure labels never clip: explicit x-limits with padding
xmin <- min(extremes$ret_log, na.rm = TRUE)
xmax <- max(extremes$ret_log, na.rm = TRUE)
pad_left <- 0.05 # extra room for negative labels (fixes -22.2%)
pad_right <- 0.03
# 6) Plot
p3c <- ggplot(extremes, aes(x = ret_log, y = q_label, fill = asset)) +
geom_vline(xintercept = 0, linewidth = 0.35, color = "grey65") +
geom_col(width = 0.72) +
geom_text(
aes(label = label),
hjust = ifelse(extremes$ret_log >= 0, -0.08, 1.18), # push negative labels further left
size = 3.6,
color = "grey20"
) +
facet_grid(asset ~ tail, scales = "free_y") +
scale_fill_manual(values = COL_2) +
scale_x_continuous(
limits = c(xmin - pad_left, xmax + pad_right),
labels = percent_format(accuracy = 1)
) +
labs(
x = "Quarterly total return (log, nominal)",
y = NULL
) +
theme_thesis() +
theme(
legend.position = "none",
strip.text = element_text(face = "bold"),
# extra margins prevent text clipping at left/right edges
plot.margin = margin(t = 10, r = 28, b = 12, l = 50)
)
p3c

# 7) Save: slightly wider so nothing is cropped
ggsave(
"Figure_12_extreme_quarters_timeline.png",
p3c,
width = 15.5, height = 8.6, dpi = 300,
limitsize = FALSE
)
3.8. Graph 13: Rolling correlation
# ============================================================
# FIGURE 13: Rolling correlation (diversification stability)
# - 12-quarter rolling correlation between equity and real estate returns
# - Uses nominal total log returns:
# * Equities: SBITOP (r_eq_total_log)
# * Real estate: Slovenia (Eurostat) (r_re_eu_total_log)
# - Same visual language as earlier figures (theme_thesis + event markers)
# ============================================================
library(dplyr)
library(ggplot2)
library(scales)
library(zoo)
# ---- Event markers (reuse the same dates as before) ----
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# ---- Rolling window length (12 quarters = 3 years) ----
win_q <- 12
# ---- Rolling correlation helper (align right: uses previous 12 quarters) ----
roll_corr <- function(x, y, k) {
zoo::rollapply(
data = cbind(x, y),
width = k,
FUN = function(z) cor(z[, 1], z[, 2], use = "complete.obs"),
by.column = FALSE,
align = "right",
fill = NA
)
}
# ---- Build working df (assumes df exists from your Figure 2A/2B chunk) ----
# df must contain: date_q, r_eq_total_log, r_re_eu_total_log
df_corr <- df %>%
arrange(date_q) %>%
transmute(
date_q,
r_eq = r_eq_total_log,
r_re = r_re_eu_total_log
) %>%
mutate(
corr_12q = roll_corr(r_eq, r_re, win_q)
) %>%
filter(!is.na(corr_12q))
# ---- Y position for event labels (keep consistent placement) ----
y_top <- max(df_corr$corr_12q, na.rm = TRUE)
# ---- Plot ----
p4a <- ggplot(df_corr, aes(x = date_q, y = corr_12q)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey70") +
geom_line(linewidth = 1.05, color = "grey20") +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(limits = c(-1, 1), breaks = seq(-1, 1, 0.25)) +
labs(
x = NULL,
y = "Rolling correlation"
) +
theme_thesis() +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
p4a

ggsave(
filename = "Figure_13_rolling_correlation_12q.png",
plot = p4a,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
3.9. Graph 14: Scatter and conditional behavior
# ============================================================
# FIGURE 14: Scatter + conditional behaviour (RE vs EQ returns)
# - Uses quarterly nominal total log returns:
# * Equity: r_eq_total_log
# * Real estate: r_re_eu_total_log (Eurostat Slovenia)
# - Highlights crisis regimes (same event dates as earlier figures)
# ============================================================
library(dplyr)
library(ggplot2)
library(scales)
# ---- Toggle: one line overall vs separate lines by regime ----
by_regime <- TRUE # set FALSE if you want one fitted line only
# ---- Event dates (same as your vertical markers) ----
d_euro <- as.Date("2011-09-30")
d_covid <- as.Date("2020-03-31")
d_infl <- as.Date("2022-03-31")
# ---- Regime windows (keep simple + readable) ----
# You can adjust these, but don't over-engineer: the goal is visual intuition.
w_euro <- 2 # quarters on each side of event
w_covid <- 2
w_infl <- 2
# Helper to create +/- k quarter window in Date terms
add_q <- function(d, k) seq(d, by = "quarter", length.out = abs(k) + 1)[abs(k) + 1] # safe-ish
# More robust: use zoo/tsibble; but this works if date_q is quarter-end dates.
# ---- Colors consistent with your thesis palette ----
COL_EQ <- "#F8766D" # SBITOP
COL_RE <- "#B79F00" # Eurostat RE
COLS_REGIME <- c(
"Non-event quarters" = "grey",
"Euro area crisis quarters" = "green",
"COVID-19 shock quarters" = "red",
"Inflation & rate shock quarters" = "orange"
)
# ============================================================
# 1) Build scatter dataset (assumes df already exists from Fig 2A/2B)
# and contains: date_q, r_eq_total_log, r_re_eu_total_log
# ============================================================
scat <- df %>%
select(date_q, r_eq_total_log, r_re_eu_total_log) %>%
filter(!is.na(r_eq_total_log), !is.na(r_re_eu_total_log)) %>%
mutate(
# Label event windows as regimes; otherwise "Normal"
regime = case_when(
date_q >= (d_euro - 90*w_euro) & date_q <= (d_euro + 90*w_euro) ~ "Euro area crisis quarters",
date_q >= (d_covid - 90*w_covid) & date_q <= (d_covid + 90*w_covid) ~ "COVID-19 shock quarters",
date_q >= (d_infl - 90*w_infl) & date_q <= (d_infl + 90*w_infl) ~ "Inflation & rate shock quarters",
TRUE ~ "Non-event quarters"
),
regime = factor(regime, levels = c("Non-event quarters","Euro area crisis quarters","COVID-19 shock quarters","Inflation & rate shock quarters"))
)
# ============================================================
# 2) Plot
# ============================================================
p4b <- ggplot(scat, aes(x = r_eq_total_log, y = r_re_eu_total_log)) +
geom_hline(yintercept = 0, linewidth = 0.35, color = "grey70") +
geom_vline(xintercept = 0, linewidth = 0.35, color = "grey70") +
geom_point(aes(color = regime), size = 2.1, alpha = 0.85) +
scale_color_manual(values = COLS_REGIME) +
scale_x_continuous(labels = percent_format(accuracy = 1)) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
x = "Equities: SBITOP quarterly total return (log, nominal)",
y = "Real estate: Slovenia (Eurostat) quarterly total return (log, nominal)"
) +
theme_thesis() +
guides(color = guide_legend(ncol = 2, byrow = TRUE))
# ---- Add fitted line(s) ----
if (by_regime) {
# Separate OLS lines by regime (including Normal)
p4b <- p4b +
geom_smooth(aes(color = regime), method = "lm", se = FALSE, linewidth = 0.9)
} else {
# One overall OLS line
p4b <- p4b +
geom_smooth(color = "black", method = "lm", se = FALSE, linewidth = 0.9)
}
p4b
## `geom_smooth()` using formula = 'y ~ x'

ggsave(
filename = "Figure_14_scatter_RE_vs_EQ_regimes.png",
plot = p4b,
width = 12,
height = 7.0,
dpi = 300,
limitsize = FALSE
)
## `geom_smooth()` using formula = 'y ~ x'
3.10. Graph 15: Lead lag
# ============================================================
# FIGURE 15: Lead–lag / delayed adjustment (CCF)
# - Cross-correlation between equity and real estate returns
# - Uses quarterly NOMINAL total log returns:
# x_t = r_eq_total_log (SBITOP)
# y_t = r_re_eu_total_log (Slovenia RE, Eurostat)
# - Positive lags (k > 0): corr(x_{t-k}, y_t) => equities lead RE by k quarters
# - Negative lags (k < 0): corr(x_{t+|k|}, y_t) => RE leads equities by |k| quarters
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
# ---- Assumption: df already exists (from Figures 2A/2B chunks) with:
# df$date_q, df$r_eq_total_log, df$r_re_eu_total_log
# If not, create it: df <- master %>% arrange(date_q) %>% mutate(...) # as you did earlier
# ---- Parameters ----
max_lag_q <- 12 # show +/- 12 quarters (3 years)
alpha_sig <- 0.05
# ---- Prepare aligned returns (complete cases only) ----
ccf_df <- df %>%
select(date_q, r_eq_total_log, r_re_eu_total_log) %>%
filter(complete.cases(.)) %>%
arrange(date_q)
x <- ccf_df$r_eq_total_log
y <- ccf_df$r_re_eu_total_log
n <- length(x)
# ---- Compute CCF without plotting ----
cc <- stats::ccf(x, y, lag.max = max_lag_q, plot = FALSE, na.action = na.pass)
cc_plot <- tibble(
lag = as.integer(cc$lag),
corr = as.numeric(cc$acf)
)
# ---- Approx. significance bands (white-noise benchmark) ----
# For large n, approx SE(corr) ~ 1/sqrt(n). CI ~ +/- z * 1/sqrt(n)
z <- qnorm(1 - alpha_sig/2)
sig <- z / sqrt(n)
# ---- Event markers (same as earlier chapters; optional here) ----
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# ---- Plot ----
p4c <- ggplot(cc_plot, aes(x = lag, y = corr)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey55") +
geom_hline(yintercept = c(-sig, sig), linetype = 3, linewidth = 0.4, color = "grey60") +
geom_col(width = 0.85, fill = "grey25") +
scale_x_continuous(
breaks = seq(-max_lag_q, max_lag_q, by = 2),
minor_breaks = NULL
) +
scale_y_continuous(limits = c(-1, 1), breaks = seq(-1, 1, by = 0.25)) +
labs(
x = "Lag (quarters) | k > 0: equities lead RE by k quarters",
y = "Cross-correlation"
) +
theme_thesis() +
theme(
legend.position = "none",
plot.margin = margin(t = 10, r = 10, b = 10, l = 10)
)
p4c

ggsave(
filename = "Figure_15_CCF_equity_vs_realestate_nominal.png",
plot = p4c,
width = 12,
height = 6.8,
dpi = 300,
limitsize = FALSE
)
3.11. Graph 16 & 17: Wealth and return divergence by region
# ============================================================
# FIGURE 16 & 17: Regional sensitivity within real estate
# 16: Small multiples (same scale) – wealth indices for SI, LJ, ex-LJ
# 17: Returns comparison by region – violin + boxplot
# Data: SURS regional HPI series + rent-income model (as in Ch. 3)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
library(stringr)
# -----------------------------
# Global events + theme (reuse)
# -----------------------------
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
theme_thesis <- function() {
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 10.5, color = "grey30"),
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 9.5),
legend.key.width = unit(14, "pt"),
legend.spacing.x = unit(6, "pt"),
legend.box.margin = margin(t = 8),
plot.margin = margin(t = 10, r = 10, b = 10, l = 10),
panel.grid.minor = element_blank()
)
}
add_events <- function(p, y_top, events_df = events) {
p +
geom_vline(
data = events_df,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events_df,
aes(x = date, y = 0.96 * y_top, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
)
}
# -----------------------------
# Colors (match earlier figures)
# -----------------------------
COL_RE <- c(
"Real estate: Slovenia (SURS)" = "#00BFC4",
"Real estate: Ljubljana (SURS)" = "#619CFF",
"Real estate: Slovenia ex-LJ (SURS)" = "#C77CFF"
)
# Optional: keep RE style similar to earlier plots (dashed/dotted)
LT_RE <- c(
"Real estate: Slovenia (SURS)" = "solid",
"Real estate: Ljubljana (SURS)" = "solid",
"Real estate: Slovenia ex-LJ (SURS)" = "solid"
)
# ============================================================
# 1) Build SURS nominal total log returns (HPI + rent-income model)
# ============================================================
y0_ann <- 0.04 # baseline gross annual rental yield (same as earlier)
df_re <- master %>%
arrange(date_q) %>%
select(date_q, hicp_rent, hpi_si_surs_used, hpi_lj_surs_used, hpi_si_exlj_surs_used)
# Base quarter (first quarter with all inputs for anchoring rent/price yield model)
base <- df_re %>%
filter(
!is.na(hicp_rent),
!is.na(hpi_si_surs_used),
!is.na(hpi_lj_surs_used),
!is.na(hpi_si_exlj_surs_used)
) %>%
slice(1)
R0 <- base$hicp_rent
Hs0 <- base$hpi_si_surs_used
Hl0 <- base$hpi_lj_surs_used
Hx0 <- base$hpi_si_exlj_surs_used
df_re <- df_re %>%
mutate(
# price log returns
r_si_price_log = log(hpi_si_surs_used / lag(hpi_si_surs_used)),
r_lj_price_log = log(hpi_lj_surs_used / lag(hpi_lj_surs_used)),
r_ex_price_log = log(hpi_si_exlj_surs_used / lag(hpi_si_exlj_surs_used)),
# time-varying rental yields via rent/price ratio (anchored at base quarter)
y_si_t = y0_ann * ((hicp_rent / R0) / (hpi_si_surs_used / Hs0)),
y_lj_t = y0_ann * ((hicp_rent / R0) / (hpi_lj_surs_used / Hl0)),
y_ex_t = y0_ann * ((hicp_rent / R0) / (hpi_si_exlj_surs_used / Hx0)),
# quarterly rental income (lagged to avoid look-ahead)
r_si_income_q = lag(y_si_t) / 4,
r_lj_income_q = lag(y_lj_t) / 4,
r_ex_income_q = lag(y_ex_t) / 4,
# nominal total log returns
r_si_total_log = r_si_price_log + r_si_income_q,
r_lj_total_log = r_lj_price_log + r_lj_income_q,
r_ex_total_log = r_ex_price_log + r_ex_income_q
)
# ============================================================
# FIGURE 16: Wealth indices (3 lines, one panel, same scale)
# ============================================================
# Wealth indices from total log returns (start = 100 at beginning of series)
df_wealth <- df_re %>%
mutate(
idx_si = 100 * exp(cumsum(replace_na(r_si_total_log, 0))),
idx_lj = 100 * exp(cumsum(replace_na(r_lj_total_log, 0))),
idx_ex = 100 * exp(cumsum(replace_na(r_ex_total_log, 0)))
)
plot_5a <- df_wealth %>%
select(date_q, idx_si, idx_lj, idx_ex) %>%
pivot_longer(-date_q, names_to = "series", values_to = "wealth") %>%
mutate(
series = recode(series,
idx_si = "Real estate: Slovenia (SURS)",
idx_lj = "Real estate: Ljubljana (SURS)",
idx_ex = "Real estate: Slovenia ex-LJ (SURS)"
),
series = factor(series, levels = names(COL_RE))
)
y_top_5a <- max(plot_5a$wealth, na.rm = TRUE)
p5a <- ggplot(plot_5a, aes(date_q, wealth, color = series, linetype = series)) +
geom_line(linewidth = 1.05) +
scale_color_manual(values = COL_RE) +
scale_linetype_manual(values = LT_RE) +
scale_y_continuous(labels = label_number(accuracy = 1)) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
guides(
color = guide_legend(ncol = 2, byrow = TRUE),
linetype = "none"
) +
labs(
x = NULL,
y = "Wealth index (base = 100)"
) +
theme_thesis()
p5a <- add_events(p5a, y_top_5a)
p5a

ggsave(
filename = "Figure_16_regional_wealth_indices_one_panel.png",
plot = p5a,
width = 12,
height = 8.2,
dpi = 300,
limitsize = FALSE
)
# ============================================================
# FIGURE 17: Return distributions by region (violin + box)
# ============================================================
plot_5b <- df_re %>%
select(date_q, r_si_total_log, r_lj_total_log, r_ex_total_log) %>%
pivot_longer(-date_q, names_to = "series", values_to = "ret_log") %>%
filter(!is.na(ret_log)) %>%
mutate(
series = recode(series,
r_si_total_log = "Real estate: Slovenia (SURS)",
r_lj_total_log = "Real estate: Ljubljana (SURS)",
r_ex_total_log = "Real estate: Slovenia ex-LJ (SURS)"
),
series = factor(series, levels = names(COL_RE))
)
p5b <- ggplot(plot_5b, aes(x = series, y = ret_log, fill = series)) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey70") +
geom_violin(trim = FALSE, linewidth = 0.7, alpha = 0.55) +
geom_boxplot(width = 0.18, outlier.size = 1.4, linewidth = 0.7) +
scale_fill_manual(values = COL_RE) +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
labs(
x = NULL,
y = "Quarterly total return (log, nominal)"
) +
theme_thesis() +
theme(
legend.position = "none",
axis.text.x = element_text(size = 10)
)
p5b

ggsave(
filename = "Figure_17_regional_return_distributions_violin.png",
plot = p5b,
width = 12,
height = 7.2,
dpi = 300,
limitsize = FALSE
)
3.12. Graph 18: Drawdowns by region
# ============================================================
# FIGURE 18: Drawdowns by region (underwater plot, SURS)
# Goal: make "regional risk is different" visually obvious
# - Computes drawdowns from TOTAL RETURN wealth indices (nominal)
# - Regions: Slovenia (SURS), Ljubljana (SURS), Slovenia ex-LJ (SURS)
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
# ---- Event markers (reuse across Chapter 4/5) ----
events <- tibble::tribble(
~date, ~label,
as.Date("2011-09-30"), "Euro area crisis",
as.Date("2020-03-31"), "COVID-19 shock",
as.Date("2022-03-31"), "Inflation & rate shock"
)
# ---- Colors consistent with earlier figures ----
COL_REG <- c(
"Real estate: Slovenia (SURS)" = "#00BFC4",
"Real estate: Ljubljana (SURS)" = "#619CFF",
"Real estate: Slovenia ex-LJ (SURS)" = "#C77CFF"
)
# ---- Linetypes: keep RE dashed as in earlier figures ----
LT_REG <- c(
"Real estate: Slovenia (SURS)" = "solid",
"Real estate: Ljubljana (SURS)" = "solid",
"Real estate: Slovenia ex-LJ (SURS)" = "solid"
)
# 1) Build nominal total return wealth indices (base = 100)
# If you already built these indices earlier, you can skip this and just select them.
df_5c <- df %>%
arrange(date_q) %>%
transmute(
date_q,
idx_si = 100 * exp(cumsum(replace_na(r_re_si_total_log, 0))),
idx_lj = 100 * exp(cumsum(replace_na(r_re_lj_total_log, 0))),
idx_ex = 100 * exp(cumsum(replace_na(r_re_ex_total_log, 0)))
)
# 2) Compute drawdowns (underwater): DD_t = (Index_t / max_to_date(Index)) - 1
dd_from_index <- function(x) {
x / cummax(x) - 1
}
dd_long <- df_5c %>%
mutate(
dd_si = dd_from_index(idx_si),
dd_lj = dd_from_index(idx_lj),
dd_ex = dd_from_index(idx_ex)
) %>%
select(date_q, dd_si, dd_lj, dd_ex) %>%
pivot_longer(-date_q, names_to = "series", values_to = "dd") %>%
mutate(
series = recode(series,
dd_si = "Real estate: Slovenia (SURS)",
dd_lj = "Real estate: Ljubljana (SURS)",
dd_ex = "Real estate: Slovenia ex-LJ (SURS)"
),
series = factor(series, levels = names(COL_REG))
) %>%
filter(!is.na(dd))
# 3) Optional: identify and label max drawdown (MDD) point per region
mdd_pts <- dd_long %>%
group_by(series) %>%
slice_min(order_by = dd, n = 1, with_ties = FALSE) %>%
ungroup() %>%
mutate(mdd_lab = paste0("MDD: ", percent(dd, accuracy = 0.1)))
# 4) Build plot (overlay)
p5c <- ggplot(dd_long, aes(x = date_q, y = dd, color = series, linetype = series)) +
geom_hline(yintercept = 0, linewidth = 0.35, color = "grey65") +
geom_line(linewidth = 1.05) +
geom_point(
data = mdd_pts,
aes(x = date_q, y = dd),
inherit.aes = FALSE,
size = 2.0,
color = "grey25"
) +
geom_text(
data = mdd_pts,
aes(x = date_q, y = dd, label = mdd_lab),
inherit.aes = FALSE,
hjust = 0,
vjust = -0.5,
size = 3.2,
color = "grey25"
) +
geom_vline(
data = events,
aes(xintercept = date),
linewidth = 0.4,
linetype = 3,
color = "grey60",
inherit.aes = FALSE
) +
geom_text(
data = events,
aes(x = date, y = 0.02, label = label),
vjust = -0.3,
hjust = 0,
size = 3.2,
color = "grey35",
inherit.aes = FALSE
) +
scale_color_manual(values = COL_REG, drop = TRUE) +
scale_linetype_manual(values = LT_REG, drop = TRUE) +
scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
guides(
color = guide_legend(ncol = 3, byrow = TRUE),
linetype = "none"
) +
labs(
x = NULL,
y = "Drawdown from peak (%, nominal)"
) +
theme_thesis()
p5c

ggsave(
filename = "Figure_18_drawdowns_by_region_SURS.png",
plot = p5c,
width = 12,
height = 7.8,
dpi = 300,
limitsize = FALSE
)
3.13. Graph 19: Holding period outcomes
# ============================================================
# FIGURE 19: Holding-period outcomes (net of costs)
# - Distribution of realized NET outcomes by holding period
# - Equity (SBITOP total return) vs Real estate (Eurostat SI, total return)
# - Shows timing risk + transaction-cost drag
#
# REQUIREMENTS / ASSUMPTIONS
# 1) You already have `df` from earlier (Figure 2A/2B construction), containing:
# date_q, r_eq_total_log, r_re_eu_total_log
# 2) You have loaded:
# - costs_base (from your RDS)
# - eq_cgt (capital gains tax schedule for equities, from your RDS)
# 3) You have `theme_thesis()` defined (as in your thesis toolkit).
#
# Notes:
# - Net outcome here is implemented as: log gross growth over horizon + log(1 - roundtrip_cost)
# and optionally minus capital-gains tax on the *price* gain component.
# - If your `costs_base` / `eq_cgt` objects use different column names, adjust the "PARAMS" block.
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
library(zoo)
# ---- Prereqs (must exist from earlier chunks) ----
# df with: date_q, r_eq_total_log, r_re_eu_total_log
# theme_thesis() defined
# --------------------------
# Holding periods (years)
# --------------------------
hp_years_vec <- c(1, 2, 3, 5, 7, 10, 15)
hp_q_vec <- hp_years_vec * 4
hp_levels <- paste0(hp_years_vec, "y") # UNIQUE levels -> fixes your duplicated factor issue
# --------------------------
# Costs and CGT schedules
# --------------------------
# Use your objects if they exist; otherwise safe defaults
eq_roundtrip_cost <- if (exists("costs_base")) {
if (is.data.frame(costs_base) && "eq_roundtrip_cost" %in% names(costs_base)) costs_base$eq_roundtrip_cost[1] else
if (is.list(costs_base) && "eq_roundtrip_cost" %in% names(costs_base)) costs_base$eq_roundtrip_cost else 0.002
} else 0.002
re_roundtrip_cost <- if (exists("costs_base")) {
if (is.data.frame(costs_base) && "re_roundtrip_cost" %in% names(costs_base)) costs_base$re_roundtrip_cost[1] else
if (is.list(costs_base) && "re_roundtrip_cost" %in% names(costs_base)) costs_base$re_roundtrip_cost else 0.08
} else 0.08
get_eq_cgt_rate <- function(years) {
if (!exists("eq_cgt")) return(0)
if (!is.data.frame(eq_cgt)) return(0)
y_col <- intersect(names(eq_cgt), c("years", "holding_years", "hp_years"))
r_col <- intersect(names(eq_cgt), c("rate", "tax_rate", "cgt_rate"))
if (length(y_col) != 1 || length(r_col) != 1) return(0)
tmp <- eq_cgt %>% arrange(.data[[y_col]]) %>% filter(.data[[y_col]] <= years)
if (nrow(tmp) == 0) {
return(eq_cgt %>% arrange(.data[[y_col]]) %>% slice(1) %>% pull(.data[[r_col]]))
}
tmp %>% slice_tail(n = 1) %>% pull(.data[[r_col]])
}
# Real estate CGT: set to 0 unless you explicitly model it elsewhere
get_re_cgt_rate <- function(years) 0
# --------------------------
# Long returns (two assets)
# --------------------------
ret_long <- df %>%
arrange(date_q) %>%
transmute(
date_q,
asset_eq = r_eq_total_log,
asset_re = r_re_eu_total_log
) %>%
pivot_longer(
cols = c(asset_eq, asset_re),
names_to = "asset_key",
values_to = "r_log"
) %>%
mutate(
asset = dplyr::recode(asset_key,
asset_eq = "Equities: SBITOP",
asset_re = "Real estate: Slovenia (Eurostat)"
)
) %>%
select(date_q, asset, r_log) %>%
filter(!is.na(r_log)) %>%
arrange(asset, date_q)
# --------------------------
# Build outcomes (robust loop; no group_modify pitfalls)
# --------------------------
roll_sum_future <- function(x, k) {
# sum_{i=1..k} x_{t+i} implemented as rollapplyr on lead(x,1)
zoo::rollapplyr(dplyr::lead(x, 1), width = k, FUN = sum, fill = NA, partial = FALSE)
}
assets <- unique(ret_long$asset)
outcomes <- bind_rows(lapply(assets, function(a) {
d <- ret_long %>% filter(asset == a) %>% arrange(date_q)
rt_cost <- if (a == "Equities: SBITOP") eq_roundtrip_cost else re_roundtrip_cost
bind_rows(lapply(seq_along(hp_q_vec), function(i) {
Hy <- hp_years_vec[i]
Hq <- hp_q_vec[i]
gross_log <- roll_sum_future(d$r_log, Hq)
gross_simple <- exp(gross_log) - 1
cgt_rate <- if (a == "Equities: SBITOP") get_eq_cgt_rate(Hy) else get_re_cgt_rate(Hy)
cgt_drag <- cgt_rate * pmax(gross_simple, 0)
net_simple <- (1 + gross_simple) * (1 - rt_cost) - 1 - cgt_drag
tibble(
date_q = d$date_q,
asset = a,
hp_years = Hy,
hp_label = factor(paste0(Hy, "y"), levels = hp_levels),
gross_log = gross_log,
gross_simple = gross_simple,
net_simple = net_simple
)
}))
})) %>%
filter(!is.na(net_simple))
# --------------------------
# Plot: violin + box by holding period, split by asset
# --------------------------
COL_2 <- c(
"Equities: SBITOP" = "#F8766D",
"Real estate: Slovenia (Eurostat)" = "#B79F00"
)
p6a <- ggplot(outcomes, aes(x = hp_label, y = net_simple, fill = asset)) +
geom_hline(yintercept = 0, linewidth = 0.35, color = "grey65") +
geom_violin(
trim = TRUE,
alpha = 0.45,
width = 0.95,
position = position_dodge(width = 0.82),
color = "grey25"
) +
geom_boxplot(
width = 0.22,
outlier.size = 1.1,
outlier.alpha = 0.65,
position = position_dodge(width = 0.82),
color = "grey20"
) +
scale_fill_manual(values = COL_2) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
x = "Holding period",
y = "Net realized outcome (simple return, %)"
) +
theme_thesis() +
guides(fill = guide_legend(ncol = 2, byrow = TRUE))
p6a

ggsave(
"Figure_19_holding_period_net_outcomes.png",
p6a, width = 12, height = 8.2, dpi = 300, limitsize = FALSE
)
3.14. Graph 20 & 21: Break-even yield and scenario
robustness
names(master)
## [1] "q_id" "date_q" "year"
## [4] "qtr" "hpi_si_eurostat" "hpi_si_surs_used"
## [7] "hpi_lj_surs_used" "hpi_si_exlj_surs_used" "hicp_rent"
## [10] "hicp_all" "sbitop_close" "dy_eq_ann"
## [13] "rf_log_q" "rf_simple_q" "rf_rate_ann_qavg_pct"
names(df)
## [1] "q_id" "date_q" "year"
## [4] "qtr" "hpi_si_eurostat" "hpi_si_surs_used"
## [7] "hpi_lj_surs_used" "hpi_si_exlj_surs_used" "hicp_rent"
## [10] "hicp_all" "sbitop_close" "dy_eq_ann"
## [13] "rf_log_q" "rf_simple_q" "rf_rate_ann_qavg_pct"
## [16] "pi_log" "dy_eq_q" "r_eq_price_log"
## [19] "r_eq_total_log" "r_re_eu_price_log" "y_eu_t"
## [22] "r_re_eu_income_q" "r_re_eu_total_log" "r_re_si_price_log"
## [25] "r_re_lj_price_log" "r_re_ex_price_log" "y_si_t"
## [28] "y_lj_t" "y_ex_t" "r_re_si_income_q"
## [31] "r_re_lj_income_q" "r_re_ex_income_q" "r_re_si_total_log"
## [34] "r_re_lj_total_log" "r_re_ex_total_log" "r_eq_total_real_log"
## [37] "r_re_eu_total_real_log" "r_re_si_total_real_log" "r_re_lj_total_real_log"
## [40] "r_re_ex_total_real_log"
str(costs_base)
## tibble [14 × 8] (S3: tbl_df/tbl/data.frame)
## $ asset : chr [1:14] "RE" "RE" "RE" "RE" ...
## $ cost_group: chr [1:14] "ONGOING" "ONGOING" "ONGOING" "ONGOING" ...
## $ cost_name : chr [1:14] "maintenance" "vacancy" "property_tax" "insurance" ...
## $ applies_to: chr [1:14] "property_value" "gross_rent" "property_value" "property_value" ...
## $ timing : chr [1:14] "ANNUAL" "ANNUAL" "ANNUAL" "ANNUAL" ...
## $ rate : num [1:14] 0.01 0.05 0.005 0.002 0.08 0.002 0.005 0.02 0.02 0.02 ...
## $ rate_unit : chr [1:14] "decimal" "decimal" "decimal" "decimal" ...
## $ note : chr [1:14] "1% of property value per year" "5% of time vacant (reduces rent)" "0.5% of property value per year" "0.2% of property value per year" ...
head(eq_cgt)
## # A tibble: 4 × 5
## holding_years_min holding_years_max tax_rate rate_unit note
## <dbl> <dbl> <dbl> <chr> <chr>
## 1 0 5 0.25 decimal 25% if held <5 years
## 2 5 10 0.2 decimal 20% if held 5–10 years
## 3 10 15 0.15 decimal 15% if held 10–15 years
## 4 15 999 0 decimal No tax if held >=15 ye…
# ============================================================
# FIGURE 20: Break-even yield map using TERMINAL WEALTH RATIO GAP
# gap_wr = median( (1+RE_net)/(1+EQ_net) - 1 )
# gap_wr > 0 => RE ends with more wealth than EQ
# gap_wr = 0 => break-even
# gap_wr < 0 => EQ ends with more wealth than RE
#
# FIGURE 21: Tornado sensitivity for the same metric
# includes legend explaining bars, dots, baseline
# ============================================================
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(lubridate)
library(zoo)
library(stringr)
library(forcats)
# --------------------------
# 0) Helpers: robust params
# --------------------------
pull_cost <- function(costs_base, field, default) {
if (!exists("costs_base")) return(default)
if (is.data.frame(costs_base) && field %in% names(costs_base)) return(as.numeric(costs_base[[field]][1]))
if (is.list(costs_base) && field %in% names(costs_base)) return(as.numeric(costs_base[[field]]))
default
}
eq_roundtrip_cost_base <- pull_cost(costs_base, "eq_roundtrip_cost", 0.002)
re_roundtrip_cost_base <- pull_cost(costs_base, "re_roundtrip_cost", 0.08)
get_eq_cgt_rate <- function(years) {
if (!exists("eq_cgt")) return(0)
if (!is.data.frame(eq_cgt)) return(0)
y_col <- intersect(names(eq_cgt), c("years", "holding_years", "hp_years"))
r_col <- intersect(names(eq_cgt), c("rate", "tax_rate", "cgt_rate"))
if (length(y_col) != 1 || length(r_col) != 1) return(0)
tmp <- eq_cgt %>% arrange(.data[[y_col]]) %>% filter(.data[[y_col]] <= years)
if (nrow(tmp) == 0) {
return(eq_cgt %>% arrange(.data[[y_col]]) %>% slice(1) %>% pull(.data[[r_col]]))
}
tmp %>% slice_tail(n = 1) %>% pull(.data[[r_col]])
}
roll_sum_future <- function(x, k) {
zoo::rollapplyr(dplyr::lead(x, 1), width = k, FUN = sum, fill = NA, partial = FALSE)
}
# --------------------------
# 1) Build EQ and RE quarterly log total returns
# (Same construction as earlier, but self-contained here)
# --------------------------
df_base <- df %>%
arrange(date_q) %>%
select(date_q, sbitop_close, dy_eq_ann, hicp_rent, hpi_si_eurostat) %>%
filter(!is.na(date_q))
base_anchor <- df_base %>%
filter(!is.na(hicp_rent), !is.na(hpi_si_eurostat)) %>%
slice(1)
R0 <- base_anchor$hicp_rent
H0 <- base_anchor$hpi_si_eurostat
df_eq <- df_base %>%
mutate(
dy_eq_q = dy_eq_ann / 4,
r_eq_price_log = log(sbitop_close / lag(sbitop_close)),
r_eq_total_log = r_eq_price_log + dy_eq_q
) %>%
select(date_q, r_eq_total_log)
df_re_price <- df_base %>%
mutate(
r_re_price_log = log(hpi_si_eurostat / lag(hpi_si_eurostat)),
rent_price_ratio_rel = (hicp_rent / R0) / (hpi_si_eurostat / H0)
) %>%
select(date_q, r_re_price_log, rent_price_ratio_rel)
# --------------------------
# 2) Metric choices (pick one)
# --------------------------
# (A) Wealth ratio gap (recommended): (1+RE_net)/(1+EQ_net) - 1
gap_wealth_ratio <- function(net_eq_simple, net_re_simple) {
# drop cases where either side <= -100% (wealth <= 0) to avoid nonsense ratios
ok <- is.finite(net_eq_simple) & is.finite(net_re_simple) &
(1 + net_eq_simple) > 0 & (1 + net_re_simple) > 0
if (!any(ok)) return(NA_real_)
median((1 + net_re_simple[ok]) / (1 + net_eq_simple[ok]) - 1, na.rm = TRUE)
}
# (B) Log gap option: log(1+RE_net) - log(1+EQ_net)
gap_log <- function(net_eq_simple, net_re_simple) {
ok <- is.finite(net_eq_simple) & is.finite(net_re_simple) &
(1 + net_eq_simple) > 0 & (1 + net_re_simple) > 0
if (!any(ok)) return(NA_real_)
median(log(1 + net_re_simple[ok]) - log(1 + net_eq_simple[ok]), na.rm = TRUE)
}
# Choose which metric you want for 6B/6C:
GAP_METRIC <- "wealth_ratio" # "wealth_ratio" or "log_gap"
# --------------------------
# 3) Net-outcome simulator for a single (hp_years, y0, eq_cost, re_cost)
# --------------------------
calc_gap_metric <- function(hp_years, y0, eq_cost, re_cost) {
Hq <- hp_years * 4
re_series <- df_re_price %>%
mutate(
y_t = y0 * rent_price_ratio_rel,
r_re_total_log = r_re_price_log + (lag(y_t) / 4)
) %>%
select(date_q, r_re_total_log)
d <- df_eq %>%
left_join(re_series, by = "date_q") %>%
filter(!is.na(r_eq_total_log), !is.na(r_re_total_log)) %>%
arrange(date_q)
if (nrow(d) < (Hq + 2)) return(NA_real_)
gross_eq_log <- roll_sum_future(d$r_eq_total_log, Hq)
gross_re_log <- roll_sum_future(d$r_re_total_log, Hq)
gross_eq_simple <- exp(gross_eq_log) - 1
gross_re_simple <- exp(gross_re_log) - 1
# Equity CGT drag (apply to positive gains)
cgt_rate <- get_eq_cgt_rate(hp_years)
cgt_drag <- cgt_rate * pmax(gross_eq_simple, 0)
# Net simple outcomes
net_eq_simple <- (1 + gross_eq_simple) * (1 - eq_cost) - 1 - cgt_drag
net_re_simple <- (1 + gross_re_simple) * (1 - re_cost) - 1
# Metric
if (GAP_METRIC == "wealth_ratio") {
return(gap_wealth_ratio(net_eq_simple, net_re_simple))
} else if (GAP_METRIC == "log_gap") {
return(gap_log(net_eq_simple, net_re_simple))
} else {
stop("Unknown GAP_METRIC.")
}
}
# ============================================================
# FIGURE 20: Heatmap + break-even contour
# ============================================================
hp_years_vec <- c(1, 2, 3, 5, 7, 10)
hp_labels <- paste0(hp_years_vec, "y")
y0_grid <- seq(0.02, 0.0625, by = 0.0025) # 2.0% to 6.25%
grid_6b <- tidyr::expand_grid(
hp_years = hp_years_vec,
y0 = y0_grid
) %>%
mutate(
gap = mapply(calc_gap_metric, hp_years, y0,
MoreArgs = list(eq_cost = eq_roundtrip_cost_base, re_cost = re_roundtrip_cost_base)),
hp_label = factor(paste0(hp_years, "y"), levels = hp_labels)
) %>%
filter(is.finite(gap))
fill_title <- if (GAP_METRIC == "wealth_ratio") {
"Median wealth ratio gap\n(RE vs EQ)"
} else {
"Median log gap\nlog(RE) − log(EQ)"
}
xlab_gap <- if (GAP_METRIC == "wealth_ratio") {
"Median terminal wealth ratio: (1+RE)/(1+EQ) − 1"
} else {
"Median log wealth gap: log(1+RE) − log(1+EQ)"
}
p6b <- ggplot(grid_6b, aes(x = hp_label, y = y0, fill = gap)) +
geom_tile(color = "white", linewidth = 0.35) +
geom_contour(aes(z = gap), breaks = 0, color = "grey20", linewidth = 0.7) +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
{
if (GAP_METRIC == "wealth_ratio") {
scale_fill_gradient2(
midpoint = 0,
labels = percent_format(accuracy = 1),
guide = guide_colorbar(
title = fill_title,
title.position = "top",
barwidth = unit(85, "pt"),
barheight = unit(12, "pt")
)
)
} else {
scale_fill_gradient2(
midpoint = 0,
labels = label_number(accuracy = 0.01),
guide = guide_colorbar(
title = fill_title,
title.position = "top",
barwidth = unit(85, "pt"),
barheight = unit(12, "pt")
)
)
}
} +
labs(
x = "Holding period",
y = "Base gross annual yield y0"
) +
theme_thesis() +
theme(
legend.position = "bottom",
legend.box.margin = margin(t = 10),
legend.text = element_text(size = 10),
plot.margin = margin(t = 10, r = 16, b = 24, l = 10)
)
p6b
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

ggsave(
filename = "Figure_20_break_even_yield_map.png",
plot = p6b,
width = 12.5,
height = 9.2,
dpi = 300,
limitsize = FALSE
)
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
# ============================================================
# FIGURE 21: Tornado sensitivity (same metric)
# ============================================================
hp_star <- 10
y0_star <- 0.04
lever_ranges <- tibble::tribble(
~lever, ~low, ~high,
"Base gross yield y0", 0.02, 0.06,
"RE roundtrip cost", 0.05, 0.12,
"Equity roundtrip cost", 0.0005, 0.01
)
gap_baseline <- calc_gap_metric(hp_star, y0_star, eq_roundtrip_cost_base, re_roundtrip_cost_base)
tornado_df <- lever_ranges %>%
rowwise() %>%
mutate(
gap_low = case_when(
lever == "Base gross yield y0" ~ calc_gap_metric(hp_star, low, eq_roundtrip_cost_base, re_roundtrip_cost_base),
lever == "RE roundtrip cost" ~ calc_gap_metric(hp_star, y0_star, eq_roundtrip_cost_base, low),
lever == "Equity roundtrip cost" ~ calc_gap_metric(hp_star, y0_star, low, re_roundtrip_cost_base),
TRUE ~ NA_real_
),
gap_high = case_when(
lever == "Base gross yield y0" ~ calc_gap_metric(hp_star, high, eq_roundtrip_cost_base, re_roundtrip_cost_base),
lever == "RE roundtrip cost" ~ calc_gap_metric(hp_star, y0_star, eq_roundtrip_cost_base, high),
lever == "Equity roundtrip cost" ~ calc_gap_metric(hp_star, y0_star, high, re_roundtrip_cost_base),
TRUE ~ NA_real_
)
) %>%
ungroup() %>%
mutate(
span = abs(gap_high - gap_low),
lo = pmin(gap_low, gap_high),
hi = pmax(gap_low, gap_high),
lever = fct_reorder(lever, span)
)
endpoints <- tornado_df %>%
select(lever, gap_low, gap_high) %>%
pivot_longer(cols = c(gap_low, gap_high), names_to = "endpoint", values_to = "gap") %>%
mutate(
endpoint = recode(endpoint, gap_low = "Low case", gap_high = "High case"),
endpoint = factor(endpoint, levels = c("Low case", "High case"))
)
x_scale <- if (GAP_METRIC == "wealth_ratio") {
scale_x_continuous(labels = percent_format(accuracy = 1))
} else {
scale_x_continuous(labels = label_number(accuracy = 0.01))
}
x_title <- if (GAP_METRIC == "wealth_ratio") {
"Median terminal wealth ratio gap: (1+RE)/(1+EQ) − 1"
} else {
"Median log wealth gap: log(1+RE) − log(1+EQ)"
}
p6c <- ggplot(tornado_df, aes(y = lever)) +
geom_vline(aes(xintercept = gap_baseline, linetype = "Baseline"), linewidth = 0.9, color = "grey15") +
geom_segment(
aes(x = lo, xend = hi, yend = lever, color = "Sensitivity range"),
linewidth = 10,
lineend = "butt",
alpha = 0.35
) +
geom_point(
data = endpoints,
aes(x = gap, y = lever, shape = endpoint),
size = 2.8,
color = "grey15",
inherit.aes = FALSE
) +
x_scale +
scale_color_manual(
values = c("Sensitivity range" = "grey40"),
name = NULL
) +
scale_linetype_manual(
values = c("Baseline" = "solid"),
name = NULL
) +
scale_shape_manual(
values = c("Low case" = 16, "High case" = 16),
name = NULL
) +
guides(
color = guide_legend(order = 1, override.aes = list(linewidth = 6, alpha = 0.35)),
shape = guide_legend(order = 2),
linetype = guide_legend(order = 3, override.aes = list(color = "grey15", linewidth = 0.9))
) +
labs(
x = x_title,
y = NULL
) +
theme_thesis() +
theme(
legend.position = "bottom",
legend.box = "vertical",
legend.spacing.y = unit(4, "pt"),
plot.margin = margin(t = 10, r = 16, b = 18, l = 10)
)
p6c

ggsave(
filename = "Figure_21_tornado_sensitivity.png",
plot = p6c,
width = 12.5,
height = 7.6,
dpi = 300,
limitsize = FALSE
)