1. Importing RAW data and adjusting to quarterly frequency
1.1. HICP Rentals (monthly to quarterly)
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pathHICPRental <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/HICP rentals monthly RAW.xlsx"
rent_raw <- read_excel(pathHICPRental, sheet = 1)
names(rent_raw)
## [1] "date" "rent_index"
rent_m <- rent_raw %>%
transmute(
date = as.Date(date),
rent_index = parse_number(as.character(rent_index),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
arrange(date)
# Convert to quarterly (quarter-end months: Mar/Jun/Sep/Dec)
rent_q <- rent_m %>%
mutate(
q_id = paste0(year(date), "Q", quarter(date)),
m = month(date)
) %>%
filter(m %in% c(3, 6, 9, 12)) %>%
group_by(q_id) %>%
slice_max(date, n = 1, with_ties = FALSE) %>%
ungroup() %>%
transmute(q_id, date_q = date, hicp_rent = rent_index)
head(rent_q)
## # A tibble: 6 × 3
## q_id date_q hicp_rent
## <chr> <date> <dbl>
## 1 2010Q1 2010-03-31 102.
## 2 2010Q2 2010-06-30 102.
## 3 2010Q3 2010-09-30 102.
## 4 2010Q4 2010-12-31 102.
## 5 2011Q1 2011-03-31 102.
## 6 2011Q2 2011-06-30 102.
tail(rent_q)
## # A tibble: 6 × 3
## q_id date_q hicp_rent
## <chr> <date> <dbl>
## 1 2023Q3 2023-09-30 168.
## 2 2023Q4 2023-12-31 164.
## 3 2024Q1 2024-03-31 165.
## 4 2024Q2 2024-06-30 171.
## 5 2024Q3 2024-09-30 177.
## 6 2024Q4 2024-12-31 173.
range(rent_q$date_q)
## [1] "2010-03-31" "2024-12-31"
anyDuplicated(rent_q$q_id)
## [1] 0
1.2. HICP All items (monthly to quarterly)
pathHICPall <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/HICP monthly inflation RAW.xlsx"
hicp_all_raw <- read_excel(pathHICPall, sheet = 1)
names(hicp_all_raw)
## [1] "Date" "hicp_all"
hicp_all_m <- hicp_all_raw %>%
transmute(
date = as.Date(Date),
hicp_all = parse_number(as.character(hicp_all),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
arrange(date)
# Convert to quarterly (quarter-end months: Mar/Jun/Sep/Dec)
hicp_all_q <- hicp_all_m %>%
mutate(
q_id = paste0(year(date), "Q", quarter(date)),
m = month(date)
) %>%
filter(m %in% c(3, 6, 9, 12)) %>%
group_by(q_id) %>%
slice_max(date, n = 1, with_ties = FALSE) %>%
ungroup() %>%
transmute(q_id, date_q = date, hicp_all = hicp_all)
head(hicp_all_q)
## # A tibble: 6 × 3
## q_id date_q hicp_all
## <chr> <date> <dbl>
## 1 2010Q1 2010-03-31 93.2
## 2 2010Q2 2010-06-30 94.8
## 3 2010Q3 2010-09-30 93.9
## 4 2010Q4 2010-12-31 94.5
## 5 2011Q1 2011-03-31 95.4
## 6 2011Q2 2011-06-30 96.2
tail(hicp_all_q)
## # A tibble: 6 × 3
## q_id date_q hicp_all
## <chr> <date> <dbl>
## 1 2023Q3 2023-09-30 128.
## 2 2023Q4 2023-12-31 126.
## 3 2024Q1 2024-03-31 127.
## 4 2024Q2 2024-06-30 128.
## 5 2024Q3 2024-09-30 128.
## 6 2024Q4 2024-12-31 129.
anyDuplicated(hicp_all_q$q_id)
## [1] 0
range(hicp_all_q$date_q)
## [1] "2010-03-31" "2024-12-31"
1.3. HPI Eurostat & SURS (Slovenia, Ljubljana, Slovenia
excluding Ljubljana)
path_hpi <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/HPI index quarter eurostat+surs RAW.xlsx"
hpi_raw <- read_excel(path_hpi, sheet = 1)
names(hpi_raw)
## [1] "date_q" "hpi_si_eurostat" "hpi_si_surs_used"
## [4] "hpi_lj_surs_used" "hpi_si_exlj_surs_used"
hpi_q <- hpi_raw %>%
transmute(
date_q = as.Date(date_q),
hpi_si_eurostat = parse_number(as.character(hpi_si_eurostat),
locale = locale(decimal_mark = ".", grouping_mark = ",")),
hpi_si_surs_used = parse_number(as.character(hpi_si_surs_used),
locale = locale(decimal_mark = ".", grouping_mark = ",")),
hpi_lj_surs_used = parse_number(as.character(hpi_lj_surs_used),
locale = locale(decimal_mark = ".", grouping_mark = ",")),
hpi_si_exlj_surs_used = parse_number(as.character(hpi_si_exlj_surs_used),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
mutate(
q_id = paste0(year(date_q), "Q", quarter(date_q))
) %>%
arrange(date_q) %>%
select(q_id, date_q,
hpi_si_eurostat, hpi_si_surs_used, hpi_lj_surs_used, hpi_si_exlj_surs_used)
head(hpi_q)
## # A tibble: 6 × 6
## q_id date_q hpi_si_eurostat hpi_si_surs_used hpi_lj_surs_used
## <chr> <date> <dbl> <dbl> <dbl>
## 1 2010Q1 2010-03-31 117. 118. 120.
## 2 2010Q2 2010-06-30 118. 118. 119.
## 3 2010Q3 2010-09-30 117. 117. 119.
## 4 2010Q4 2010-12-31 117. 116. 122.
## 5 2011Q1 2011-03-31 122. 119. 125.
## 6 2011Q2 2011-06-30 122. 120. 126.
## # ℹ 1 more variable: hpi_si_exlj_surs_used <dbl>
tail(hpi_q)
## # A tibble: 6 × 6
## q_id date_q hpi_si_eurostat hpi_si_surs_used hpi_lj_surs_used
## <chr> <date> <dbl> <dbl> <dbl>
## 1 2023Q3 2023-09-30 186. 197. 187.
## 2 2023Q4 2023-12-31 191. 205. 194.
## 3 2024Q1 2024-03-31 195. 211. 199.
## 4 2024Q2 2024-06-30 198. 213. 201.
## 5 2024Q3 2024-09-30 202. 215. 202.
## 6 2024Q4 2024-12-31 206. 219. 204.
## # ℹ 1 more variable: hpi_si_exlj_surs_used <dbl>
anyDuplicated(hpi_q$q_id)
## [1] 0
range(hpi_q$date_q)
## [1] "2010-03-31" "2024-12-31"
colSums(is.na(hpi_q))
## q_id date_q hpi_si_eurostat
## 0 0 0
## hpi_si_surs_used hpi_lj_surs_used hpi_si_exlj_surs_used
## 0 0 0
1.4. Risk-free Rate (EONIA & ESTR daily)
path_rf <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/Risk free rate RAW.xlsx"
eonia_raw <- read_excel(path_rf, sheet = 1)
estr_raw <- read_excel(path_rf, sheet = 2)
names(eonia_raw)
## [1] "month_end_date" "eonia_rate_pct"
names(estr_raw)
## [1] "date" "estr_rate_pct"
to_q_id <- function(d) paste0(year(d), "Q", quarter(d))
eonia_m <- eonia_raw %>%
transmute(
date = as.Date(month_end_date),
rate_pct = parse_number(as.character(eonia_rate_pct),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
arrange(date)
estr_d <- estr_raw %>%
transmute(
date = as.Date(date),
rate_pct = parse_number(as.character(estr_rate_pct),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
arrange(date)
# 1) EONIA monthly -> quarterly (compound monthly accruals by days-in-month)
rf_q_eonia <- eonia_m %>%
mutate(
q_id = to_q_id(date),
r = rate_pct / 100, # convert to decimal
dim = days_in_month(date), # days in month
log_accrual = log(1 + r * (dim/360))# ACT/360 monthly accrual
) %>%
group_by(q_id) %>%
summarise(rf_log_q = sum(log_accrual, na.rm = TRUE),
rf_simple_q = exp(rf_log_q) - 1,
.groups = "drop") %>%
arrange(q_id)
# 2) €STR daily -> quarterly (compound daily accruals)
rf_q_estr <- estr_d %>%
mutate(
q_id = to_q_id(date),
r = rate_pct / 100,
log_accrual = log(1 + r * (1/360)) # ACT/360 daily accrual
) %>%
group_by(q_id) %>%
summarise(rf_log_q = sum(log_accrual, na.rm = TRUE),
rf_simple_q = exp(rf_log_q) - 1,
.groups = "drop") %>%
arrange(q_id)
# Stitch into one quarterly series (EONIA < 2022Q1, €STR >= 2022Q1)
rf_q <- rf_q_eonia %>%
filter(q_id < "2022Q1") %>%
bind_rows(rf_q_estr %>% filter(q_id >= "2022Q1")) %>%
arrange(q_id)
rf_q <- rf_q %>%
mutate(
year = as.integer(substr(q_id, 1, 4)),
qtr = as.integer(substr(q_id, 6, 6)),
date_q = as.Date(NA)
) %>%
mutate(
date_q = case_when(
qtr == 1 ~ as.Date(paste0(year, "-03-31")),
qtr == 2 ~ as.Date(paste0(year, "-06-30")),
qtr == 3 ~ as.Date(paste0(year, "-09-30")),
qtr == 4 ~ as.Date(paste0(year, "-12-31"))
)
) %>%
select(q_id, date_q, rf_log_q, rf_simple_q)
# Adding quarterly average annualized rate (% p.a.)
# EONIA monthly: simple average of monthly rates within quarter
rf_rate_q_eonia <- eonia_m %>%
mutate(q_id = paste0(year(date), "Q", quarter(date))) %>%
group_by(q_id) %>%
summarise(rf_rate_ann_qavg_pct = mean(rate_pct, na.rm = TRUE), .groups = "drop")
# €STR daily: simple average of daily rates within quarter
rf_rate_q_estr <- estr_d %>%
mutate(q_id = paste0(year(date), "Q", quarter(date))) %>%
group_by(q_id) %>%
summarise(rf_rate_ann_qavg_pct = mean(rate_pct, na.rm = TRUE), .groups = "drop")
# Stitch rates the same way as returns
rf_rate_q <- rf_rate_q_eonia %>%
filter(q_id < "2022Q1") %>%
bind_rows(rf_rate_q_estr %>% filter(q_id >= "2022Q1")) %>%
arrange(q_id)
# Merge into existing table
rf_q <- rf_q %>%
left_join(rf_rate_q, by = "q_id") %>%
select(q_id, date_q, rf_rate_ann_qavg_pct, rf_log_q, rf_simple_q)
# Quick check
head(rf_q)
## # A tibble: 6 × 5
## q_id date_q rf_rate_ann_qavg_pct rf_log_q rf_simple_q
## <chr> <date> <dbl> <dbl> <dbl>
## 1 2010Q1 2010-03-31 0.344 0.000860 0.000861
## 2 2010Q2 2010-06-30 0.350 0.000886 0.000886
## 3 2010Q3 2010-09-30 0.453 0.00116 0.00116
## 4 2010Q4 2010-12-31 0.597 0.00153 0.00153
## 5 2011Q1 2011-03-31 0.675 0.00168 0.00169
## 6 2011Q2 2011-06-30 1.04 0.00263 0.00263
tail(rf_q)
## # A tibble: 6 × 5
## q_id date_q rf_rate_ann_qavg_pct rf_log_q rf_simple_q
## <chr> <date> <dbl> <dbl> <dbl>
## 1 2023Q3 2023-09-30 3.45 0.00614 0.00616
## 2 2023Q4 2023-12-31 3.63 0.00626 0.00628
## 3 2024Q1 2024-03-31 3.82 0.00647 0.00649
## 4 2024Q2 2024-06-30 3.76 0.00690 0.00692
## 5 2024Q3 2024-09-30 3.64 0.00648 0.00650
## 6 2024Q4 2024-12-31 3.37 0.00608 0.00610
anyDuplicated(rf_q$q_id)
## [1] 0
range(rf_q$date_q)
## [1] "2010-03-31" "2024-12-31"
summary(rf_q$rf_simple_q)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.248e-03 -9.151e-04 -7.721e-05 6.838e-04 9.867e-04 6.920e-03
1.5. SBITOP daily close & annual dividend yield
path_sbitop <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/SBITOP daily close - div y annual RAW.xlsx"
sbitop_daily_raw <- read_excel(path_sbitop, sheet = 1)
sbitop_divy_raw <- read_excel(path_sbitop, sheet = 2)
names(sbitop_daily_raw)
## [1] "date" "close"
names(sbitop_divy_raw)
## [1] "Year" "div_yield_pct"
sbitop_daily <- sbitop_daily_raw %>%
transmute(
date = as.Date(date),
close = parse_number(as.character(close),
locale = locale(decimal_mark = ".", grouping_mark = ","))
) %>%
arrange(date)
# Daily -> quarterly
sbitop_q <- sbitop_daily %>%
mutate(q_id = paste0(year(date), "Q", quarter(date))) %>%
group_by(q_id) %>%
slice_max(date, n = 1, with_ties = FALSE) %>%
ungroup() %>%
transmute(q_id, date_q = date, sbitop_close = close) %>%
arrange(date_q)
divy_y <- sbitop_divy_raw %>%
transmute(
year = as.integer(Year),
dy_eq_ann = parse_number(as.character(div_yield_pct),
locale = locale(decimal_mark = ".", grouping_mark = ",")) / 100
) %>%
arrange(year)
# Annual -> quarterly (repeat within year)
divy_q <- expand_grid(year = divy_y$year, qtr = 1:4) %>%
left_join(divy_y, by = "year") %>%
mutate(q_id = paste0(year, "Q", qtr)) %>%
select(q_id, dy_eq_ann)
head(sbitop_q); tail(sbitop_q)
## # A tibble: 6 × 3
## q_id date_q sbitop_close
## <chr> <date> <dbl>
## 1 2010Q1 2010-03-31 966.
## 2 2010Q2 2010-06-30 880.
## 3 2010Q3 2010-09-30 831.
## 4 2010Q4 2010-12-30 850.
## 5 2011Q1 2011-03-31 832.
## 6 2011Q2 2011-06-30 742.
## # A tibble: 6 × 3
## q_id date_q sbitop_close
## <chr> <date> <dbl>
## 1 2023Q3 2023-09-29 1174.
## 2 2023Q4 2023-12-29 1253.
## 3 2024Q1 2024-03-28 1462.
## 4 2024Q2 2024-06-28 1578.
## 5 2024Q3 2024-09-30 1604.
## 6 2024Q4 2024-12-30 1667.
anyDuplicated(sbitop_q$q_id)
## [1] 0
range(sbitop_q$date_q)
## [1] "2010-03-31" "2024-12-30"
head(divy_q); tail(divy_q)
## # A tibble: 6 × 2
## q_id dy_eq_ann
## <chr> <dbl>
## 1 2010Q1 0.0258
## 2 2010Q2 0.0258
## 3 2010Q3 0.0258
## 4 2010Q4 0.0258
## 5 2011Q1 0.0323
## 6 2011Q2 0.0323
## # A tibble: 6 × 2
## q_id dy_eq_ann
## <chr> <dbl>
## 1 2023Q3 0.0653
## 2 2023Q4 0.0653
## 3 2024Q1 0.0684
## 4 2024Q2 0.0684
## 5 2024Q3 0.0684
## 6 2024Q4 0.0684
anyDuplicated(divy_q$q_id)
## [1] 0
range(divy_y$year)
## [1] 2010 2024
1.6. Transaction costs (parameter files for scenario analysis - Cost
base & Equity capital gains tax schedule)
path_costs <- "/Users/lukavrtac/Faks - EF/IMB/Master thesis/Data/R ready/Transaction costs RAW.xlsx"
costs_base <- read_excel(path_costs, sheet = "COSTS_BASE") %>%
transmute(
asset = as.character(asset),
cost_group = as.character(cost_group),
cost_name = as.character(cost_name),
applies_to = as.character(applies_to),
timing = as.character(timing),
rate = as.numeric(rate),
rate_unit = as.character(rate_unit),
note = as.character(note)
)
eq_cgt_schedule <- read_excel(path_costs, sheet = "EQ_CGT_SCHEDULE") %>%
transmute(
holding_years_min = as.numeric(holding_years_min),
holding_years_max = as.numeric(holding_years_max),
tax_rate = as.numeric(tax_rate),
rate_unit = as.character(rate_unit),
note = as.character(note)
)
2. Master file generation & analysis ready files
cal_q <- tibble(
q_id = sprintf("%dQ%d",
rep(2010:2024, each = 4),
rep(1:4, times = 15))
) %>%
mutate(
year = as.integer(str_sub(q_id, 1, 4)),
qtr = as.integer(str_sub(q_id, 6, 6)),
date_q = case_when(
qtr == 1 ~ as.Date(paste0(year, "-03-31")),
qtr == 2 ~ as.Date(paste0(year, "-06-30")),
qtr == 3 ~ as.Date(paste0(year, "-09-30")),
qtr == 4 ~ as.Date(paste0(year, "-12-31"))
)
) %>%
select(q_id, date_q, year, qtr)
stopifnot(!anyDuplicated(rent_q$q_id))
stopifnot(!anyDuplicated(hicp_all_q$q_id))
stopifnot(!anyDuplicated(hpi_q$q_id))
stopifnot(!anyDuplicated(rf_q$q_id))
stopifnot(!anyDuplicated(sbitop_q$q_id))
stopifnot(!anyDuplicated(divy_q$q_id))
# Merge into one master quarterly LEVELS dataset
master_q_levels <- cal_q %>%
left_join(hpi_q %>% select(q_id, hpi_si_eurostat, hpi_si_surs_used, hpi_lj_surs_used, hpi_si_exlj_surs_used),
by = "q_id") %>%
left_join(rent_q %>% select(q_id, hicp_rent),
by = "q_id") %>%
left_join(hicp_all_q %>% select(q_id, hicp_all),
by = "q_id") %>%
left_join(sbitop_q %>% select(q_id, sbitop_close),
by = "q_id") %>%
left_join(divy_q %>% select(q_id, dy_eq_ann),
by = "q_id") %>%
left_join(rf_q %>% select(q_id, rf_log_q, rf_simple_q, rf_rate_ann_qavg_pct),
by = "q_id")
qa_missing <- master_q_levels %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
tidyr::pivot_longer(everything(), names_to = "col", values_to = "na_count") %>%
arrange(desc(na_count))
print(qa_missing)
## # A tibble: 15 × 2
## col na_count
## <chr> <int>
## 1 q_id 0
## 2 date_q 0
## 3 year 0
## 4 qtr 0
## 5 hpi_si_eurostat 0
## 6 hpi_si_surs_used 0
## 7 hpi_lj_surs_used 0
## 8 hpi_si_exlj_surs_used 0
## 9 hicp_rent 0
## 10 hicp_all 0
## 11 sbitop_close 0
## 12 dy_eq_ann 0
## 13 rf_log_q 0
## 14 rf_simple_q 0
## 15 rf_rate_ann_qavg_pct 0
table(master_q_levels$qtr) # should be equal counts per quarter
##
## 1 2 3 4
## 15 15 15 15
anyDuplicated(master_q_levels$q_id) # should be 0
## [1] 0
head(master_q_levels, 10)
## # A tibble: 10 × 15
## q_id date_q year qtr hpi_si_eurostat hpi_si_surs_used
## <chr> <date> <int> <int> <dbl> <dbl>
## 1 2010Q1 2010-03-31 2010 1 117. 118.
## 2 2010Q2 2010-06-30 2010 2 118. 118.
## 3 2010Q3 2010-09-30 2010 3 117. 117.
## 4 2010Q4 2010-12-31 2010 4 117. 116.
## 5 2011Q1 2011-03-31 2011 1 122. 119.
## 6 2011Q2 2011-06-30 2011 2 122. 120.
## 7 2011Q3 2011-09-30 2011 3 119. 119.
## 8 2011Q4 2011-12-31 2011 4 118. 116.
## 9 2012Q1 2012-03-31 2012 1 114. 113.
## 10 2012Q2 2012-06-30 2012 2 115. 115.
## # ℹ 9 more variables: hpi_lj_surs_used <dbl>, hpi_si_exlj_surs_used <dbl>,
## # hicp_rent <dbl>, hicp_all <dbl>, sbitop_close <dbl>, dy_eq_ann <dbl>,
## # rf_log_q <dbl>, rf_simple_q <dbl>, rf_rate_ann_qavg_pct <dbl>